From 322c3161947cd5c10e3122c097d5a888726d42c1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Mar 2022 15:40:38 +0800 Subject: [PATCH 01/19] also validate symlink collisions (#301) --- .../fixtures/make_ignorecase_collisions.sh | 4 ++++ git-worktree/tests/index/mod.rs | 17 +++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/git-worktree/tests/fixtures/make_ignorecase_collisions.sh b/git-worktree/tests/fixtures/make_ignorecase_collisions.sh index 9fb772681e0..f93284fbdeb 100644 --- a/git-worktree/tests/fixtures/make_ignorecase_collisions.sh +++ b/git-worktree/tests/fixtures/make_ignorecase_collisions.sh @@ -5,6 +5,7 @@ git init -q git config commit.gpgsign false empty_oid=$(git hash-object -w --stdin (p: impl IntoIterator) -> Vec { + p.into_iter().map(PathBuf::from).collect() + } + #[test] fn symlinks_become_files_if_disabled() -> crate::Result { let opts = opts_with_symlink(false); @@ -76,11 +80,12 @@ mod checkout { #[test] fn collisions_are_detected_on_a_case_sensitive_filesystem() { - if !probe_gitoxide_dir().unwrap().ignore_case { + let fs_caps = probe_gitoxide_dir().unwrap(); + if !fs_caps.ignore_case { eprintln!("Skipping case-insensitive testing on what would be a case-senstive file system"); return; } - let opts = opts_with_symlink(true); + let opts = opts_with_symlink(fs_caps.symlink); let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); @@ -109,6 +114,10 @@ mod checkout { path: "file_x".into(), error_kind, }, + Collision { + path: "x".into(), + error_kind, + }, ], "these files couldn't be checked out" ); @@ -116,14 +125,14 @@ mod checkout { let source_files = dir_structure(&source_tree); assert_eq!( stripped_prefix(&source_tree, &source_files), - vec![PathBuf::from("d"), PathBuf::from("file_x")], + paths(["d", "file_x", "link-to-X", "x"]), "plenty of collisions prevent a checkout" ); let dest_files = dir_structure(&destination); assert_eq!( stripped_prefix(&destination, &dest_files), - vec![PathBuf::from("D/B"), PathBuf::from("D/C"), PathBuf::from("FILE_X")], + paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), "we checkout files in order and generally handle collision detection differently, hence the difference" ); } From 48dc40195fd3d41d1fa5cd6326422ae18266dd7d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Mar 2022 16:22:44 +0800 Subject: [PATCH 02/19] refactor (#301) --- git-worktree/src/index.rs | 245 ----------------------------- git-worktree/src/index/checkout.rs | 73 +++++++++ git-worktree/src/index/entry.rs | 109 +++++++++++++ git-worktree/src/index/mod.rs | 63 ++++++++ git-worktree/tests/index/mod.rs | 42 ++--- 5 files changed, 266 insertions(+), 266 deletions(-) delete mode 100644 git-worktree/src/index.rs create mode 100644 git-worktree/src/index/checkout.rs create mode 100644 git-worktree/src/index/entry.rs create mode 100644 git-worktree/src/index/mod.rs diff --git a/git-worktree/src/index.rs b/git-worktree/src/index.rs deleted file mode 100644 index d8afb59df46..00000000000 --- a/git-worktree/src/index.rs +++ /dev/null @@ -1,245 +0,0 @@ -use git_hash::oid; - -use crate::{index, index::checkout::Collision}; - -pub mod checkout { - use bstr::BString; - use quick_error::quick_error; - - #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] - pub struct Collision { - /// the path that collided with something already present on disk. - pub path: BString, - /// The io error we encountered when checking out `path`. - pub error_kind: std::io::ErrorKind, - } - - pub struct Outcome { - pub collisions: Vec, - } - - #[derive(Clone, Copy)] - pub struct Options { - /// capabilities of the file system - pub fs: crate::fs::Capabilities, - /// If true, we assume no file to exist in the target directory, and want exclusive access to it. - /// This should be enabled when cloning to avoid checks for freshness of files. This also enables - /// detection of collisions based on whether or not exclusive file creation succeeds or fails. - pub destination_is_initially_empty: bool, - /// If true, default false, try to checkout as much as possible and don't abort on first error which isn't - /// due to a conflict. - /// The operation will never fail, but count the encountered errors instead along with their paths. - pub keep_going: bool, - /// If true, a files creation time is taken into consideration when checking if a file changed. - /// Can be set to false in case other tools alter the creation time in ways that interfere with our operation. - /// - /// Default true. - pub trust_ctime: bool, - /// If true, all stat fields will be used when checking for up-to-date'ness of the entry. Otherwise - /// nano-second parts of mtime and ctime,uid, gid, inode and device number won't be used, leaving only - /// the whole-second part of ctime and mtime and the file size to be checked. - /// - /// Default true. - pub check_stat: bool, - } - - impl Default for Options { - fn default() -> Self { - Options { - fs: Default::default(), - destination_is_initially_empty: false, - keep_going: false, - trust_ctime: true, - check_stat: true, - } - } - } - - quick_error! { - #[derive(Debug)] - pub enum Error { - IllformedUtf8{ path: BString } { - display("Could not convert path to UTF8: {}", path) - } - Time(err: std::time::SystemTimeError) { - from() - source(err) - display("The clock was off when reading file related metadata after updating a file on disk") - } - Io(err: std::io::Error) { - from() - source(err) - display("IO error while writing blob or reading file metadata or changing filetype") - } - ObjectNotFound{ oid: git_hash::ObjectId, path: std::path::PathBuf } { - display("object {} for checkout at {} not found in object database", oid.to_hex(), path.display()) - } - } - } -} - -pub fn checkout( - index: &mut git_index::State, - path: impl AsRef, - mut find: Find, - options: checkout::Options, -) -> Result -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, -{ - use std::io::ErrorKind::AlreadyExists; - let root = path.as_ref(); - let mut buf = Vec::new(); - let mut collisions = Vec::new(); - for (entry, entry_path) in index.entries_mut_with_paths() { - // TODO: write test for that - if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { - continue; - } - - let res = entry::checkout(entry, entry_path, &mut find, root, options, &mut buf); - match res { - Ok(()) => {} - // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() - #[cfg(windows)] - Err(index::checkout::Error::Io(err)) - if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => - { - collisions.push(Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - } - #[cfg(not(windows))] - Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { - // We are here because a file existed or was blocked by a directory which shouldn't be possible unless - // we are on a file insensitive file system. - collisions.push(Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - } - Err(err) => { - if options.keep_going { - todo!("keep going") - } else { - return Err(err); - } - } - } - } - Ok(checkout::Outcome { collisions }) -} - -pub(crate) mod entry { - use std::{ - convert::TryInto, - fs::{create_dir_all, OpenOptions}, - io::Write, - time::Duration, - }; - - use bstr::BStr; - use git_hash::oid; - use git_index::Entry; - - use crate::index; - - #[cfg_attr(not(unix), allow(unused_variables))] - pub fn checkout( - entry: &mut Entry, - entry_path: &BStr, - find: &mut Find, - root: &std::path::Path, - index::checkout::Options { - fs: - crate::fs::Capabilities { - symlink, - executable_bit, - .. - }, - destination_is_initially_empty, - .. - }: index::checkout::Options, - buf: &mut Vec, - ) -> Result<(), index::checkout::Error> - where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, - { - let dest = root.join(git_features::path::from_byte_slice(entry_path).map_err(|_| { - index::checkout::Error::IllformedUtf8 { - path: entry_path.to_owned(), - } - })?); - create_dir_all(dest.parent().expect("entry paths are never empty"))?; // TODO: can this be avoided to create dirs when needed only? - - match entry.mode { - git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { - let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { - oid: entry.id, - path: root.to_path_buf(), - })?; - let mut options = OpenOptions::new(); - options - .create_new(destination_is_initially_empty) - .create(!destination_is_initially_empty) - .write(true); - #[cfg(unix)] - if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE { - use std::os::unix::fs::OpenOptionsExt; - options.mode(0o777); - } - - let mut file = options.open(&dest)?; - file.write_all(obj.data)?; - // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. - // revisit this once there is a bug to fix. - update_fstat(entry, file.metadata()?)?; - } - git_index::entry::Mode::SYMLINK => { - let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { - oid: entry.id, - path: root.to_path_buf(), - })?; - let symlink_destination = git_features::path::from_byte_slice(obj.data) - .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; - - // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether - // a checkout is needed at all. - if symlink { - symlink::symlink_auto(symlink_destination, &dest)?; - } else { - std::fs::write(&dest, obj.data)?; - } - - update_fstat(entry, std::fs::symlink_metadata(&dest)?)?; - } - git_index::entry::Mode::DIR => todo!(), - git_index::entry::Mode::COMMIT => todo!(), - _ => unreachable!(), - } - Ok(()) - } - - fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> { - let ctime = meta - .created() - .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; - let mtime = meta - .modified() - .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; - - let stat = &mut entry.stat; - stat.mtime.secs = mtime - .as_secs() - .try_into() - .expect("by 2038 we found a solution for this"); - stat.mtime.nsecs = mtime.subsec_nanos(); - stat.ctime.secs = ctime - .as_secs() - .try_into() - .expect("by 2038 we found a solution for this"); - stat.ctime.nsecs = ctime.subsec_nanos(); - Ok(()) - } -} diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs new file mode 100644 index 00000000000..69ee7737a3d --- /dev/null +++ b/git-worktree/src/index/checkout.rs @@ -0,0 +1,73 @@ +use bstr::BString; +use quick_error::quick_error; + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Collision { + /// the path that collided with something already present on disk. + pub path: BString, + /// The io error we encountered when checking out `path`. + pub error_kind: std::io::ErrorKind, +} + +pub struct Outcome { + pub collisions: Vec, +} + +#[derive(Clone, Copy)] +pub struct Options { + /// capabilities of the file system + pub fs: crate::fs::Capabilities, + /// If true, we assume no file to exist in the target directory, and want exclusive access to it. + /// This should be enabled when cloning to avoid checks for freshness of files. This also enables + /// detection of collisions based on whether or not exclusive file creation succeeds or fails. + pub destination_is_initially_empty: bool, + /// If true, default false, try to checkout as much as possible and don't abort on first error which isn't + /// due to a conflict. + /// The operation will never fail, but count the encountered errors instead along with their paths. + pub keep_going: bool, + /// If true, a files creation time is taken into consideration when checking if a file changed. + /// Can be set to false in case other tools alter the creation time in ways that interfere with our operation. + /// + /// Default true. + pub trust_ctime: bool, + /// If true, all stat fields will be used when checking for up-to-date'ness of the entry. Otherwise + /// nano-second parts of mtime and ctime,uid, gid, inode and device number won't be used, leaving only + /// the whole-second part of ctime and mtime and the file size to be checked. + /// + /// Default true. + pub check_stat: bool, +} + +impl Default for Options { + fn default() -> Self { + Options { + fs: Default::default(), + destination_is_initially_empty: false, + keep_going: false, + trust_ctime: true, + check_stat: true, + } + } +} + +quick_error! { + #[derive(Debug)] + pub enum Error { + IllformedUtf8{ path: BString } { + display("Could not convert path to UTF8: {}", path) + } + Time(err: std::time::SystemTimeError) { + from() + source(err) + display("The clock was off when reading file related metadata after updating a file on disk") + } + Io(err: std::io::Error) { + from() + source(err) + display("IO error while writing blob or reading file metadata or changing filetype") + } + ObjectNotFound{ oid: git_hash::ObjectId, path: std::path::PathBuf } { + display("object {} for checkout at {} not found in object database", oid.to_hex(), path.display()) + } + } +} diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs new file mode 100644 index 00000000000..9acb7b6624c --- /dev/null +++ b/git-worktree/src/index/entry.rs @@ -0,0 +1,109 @@ +use std::{ + convert::TryInto, + fs::{create_dir_all, OpenOptions}, + io::Write, + time::Duration, +}; + +use bstr::BStr; +use git_hash::oid; +use git_index::Entry; + +use crate::index; + +#[cfg_attr(not(unix), allow(unused_variables))] +pub fn checkout( + entry: &mut Entry, + entry_path: &BStr, + find: &mut Find, + root: &std::path::Path, + index::checkout::Options { + fs: crate::fs::Capabilities { + symlink, + executable_bit, + .. + }, + destination_is_initially_empty, + .. + }: index::checkout::Options, + buf: &mut Vec, +) -> Result<(), index::checkout::Error> +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, +{ + let dest = root.join(git_features::path::from_byte_slice(entry_path).map_err(|_| { + index::checkout::Error::IllformedUtf8 { + path: entry_path.to_owned(), + } + })?); + create_dir_all(dest.parent().expect("entry paths are never empty"))?; // TODO: can this be avoided to create dirs when needed only? + + match entry.mode { + git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { + let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { + oid: entry.id, + path: root.to_path_buf(), + })?; + let mut options = OpenOptions::new(); + options + .create_new(destination_is_initially_empty) + .create(!destination_is_initially_empty) + .write(true); + #[cfg(unix)] + if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE { + use std::os::unix::fs::OpenOptionsExt; + options.mode(0o777); + } + + let mut file = options.open(&dest)?; + file.write_all(obj.data)?; + // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. + // revisit this once there is a bug to fix. + update_fstat(entry, file.metadata()?)?; + } + git_index::entry::Mode::SYMLINK => { + let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { + oid: entry.id, + path: root.to_path_buf(), + })?; + let symlink_destination = git_features::path::from_byte_slice(obj.data) + .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; + + // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether + // a checkout is needed at all. + if symlink { + symlink::symlink_auto(symlink_destination, &dest)?; + } else { + std::fs::write(&dest, obj.data)?; + } + + update_fstat(entry, std::fs::symlink_metadata(&dest)?)?; + } + git_index::entry::Mode::DIR => todo!(), + git_index::entry::Mode::COMMIT => todo!(), + _ => unreachable!(), + } + Ok(()) +} + +fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> { + let ctime = meta + .created() + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; + let mtime = meta + .modified() + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; + + let stat = &mut entry.stat; + stat.mtime.secs = mtime + .as_secs() + .try_into() + .expect("by 2038 we found a solution for this"); + stat.mtime.nsecs = mtime.subsec_nanos(); + stat.ctime.secs = ctime + .as_secs() + .try_into() + .expect("by 2038 we found a solution for this"); + stat.ctime.nsecs = ctime.subsec_nanos(); + Ok(()) +} diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs new file mode 100644 index 00000000000..c54ab9ea6ee --- /dev/null +++ b/git-worktree/src/index/mod.rs @@ -0,0 +1,63 @@ +use git_hash::oid; + +use crate::{index, index::checkout::Collision}; + +pub mod checkout; +pub(crate) mod entry; + +pub fn checkout( + index: &mut git_index::State, + path: impl AsRef, + mut find: Find, + options: checkout::Options, +) -> Result +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, +{ + if !options.destination_is_initially_empty { + todo!("deal with non-clone checkouts") + } + + use std::io::ErrorKind::AlreadyExists; + let root = path.as_ref(); + let mut buf = Vec::new(); + let mut collisions = Vec::new(); + for (entry, entry_path) in index.entries_mut_with_paths() { + // TODO: write test for that + if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { + continue; + } + + let res = entry::checkout(entry, entry_path, &mut find, root, options, &mut buf); + match res { + Ok(()) => {} + // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() + #[cfg(windows)] + Err(index::checkout::Error::Io(err)) + if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => + { + collisions.push(Collision { + path: entry_path.into(), + error_kind: err.kind(), + }); + } + #[cfg(not(windows))] + Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { + // We are here because a file existed or was blocked by a directory which shouldn't be possible unless + // we are on a file insensitive file system. + collisions.push(Collision { + path: entry_path.into(), + error_kind: err.kind(), + }); + } + Err(err) => { + if options.keep_going { + todo!("keep going") + } else { + return Err(err); + } + } + } + } + Ok(checkout::Outcome { collisions }) +} diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index f7e21481021..460f7117140 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -14,27 +14,6 @@ mod checkout { use crate::fixture_path; - fn probe_gitoxide_dir() -> crate::Result { - Ok(git_worktree::fs::Capabilities::probe( - std::env::current_dir()?.join("..").join(".git"), - )) - } - - fn opts_with_symlink(symlink: bool) -> index::checkout::Options { - index::checkout::Options { - fs: git_worktree::fs::Capabilities { - symlink, - ..Default::default() - }, - destination_is_initially_empty: true, - ..Default::default() - } - } - - fn paths<'a>(p: impl IntoIterator) -> Vec { - p.into_iter().map(PathBuf::from).collect() - } - #[test] fn symlinks_become_files_if_disabled() -> crate::Result { let opts = opts_with_symlink(false); @@ -205,4 +184,25 @@ mod checkout { fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() } + + fn probe_gitoxide_dir() -> crate::Result { + Ok(git_worktree::fs::Capabilities::probe( + std::env::current_dir()?.join("..").join(".git"), + )) + } + + fn opts_with_symlink(symlink: bool) -> index::checkout::Options { + index::checkout::Options { + fs: git_worktree::fs::Capabilities { + symlink, + ..Default::default() + }, + destination_is_initially_empty: true, + ..Default::default() + } + } + + fn paths<'a>(p: impl IntoIterator) -> Vec { + p.into_iter().map(PathBuf::from).collect() + } } From acb8acd905c4a7ec0fbc831b159f626962c0a37d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Mar 2022 16:52:43 +0800 Subject: [PATCH 03/19] hopefully fix symlink creation on windows (#301) The windows file check needs to handle relative links properly when checking for file type. --- git-worktree/src/fs.rs | 4 ++-- git-worktree/src/index/checkout.rs | 6 ++++++ git-worktree/src/index/entry.rs | 6 ++++-- git-worktree/src/lib.rs | 2 ++ git-worktree/src/os.rs | 28 ++++++++++++++++++++++++++++ 5 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 git-worktree/src/os.rs diff --git a/git-worktree/src/fs.rs b/git-worktree/src/fs.rs index 7c71001160b..e301fb386df 100644 --- a/git-worktree/src/fs.rs +++ b/git-worktree/src/fs.rs @@ -90,14 +90,14 @@ impl Capabilities { .write(true) .open(&src_path)?; let link_path = root.join("__file_link"); - if symlink::symlink_file(&src_path, &link_path).is_err() { + if crate::os::create_symlink(&src_path, &link_path).is_err() { std::fs::remove_file(&src_path)?; return Ok(false); } let res = std::fs::symlink_metadata(&link_path).map(|m| m.is_symlink()); let cleanup = std::fs::remove_file(&src_path); - symlink::remove_symlink_file(&link_path) + crate::os::remove_symlink(&link_path) .or_else(|_| std::fs::remove_file(&link_path)) .and(cleanup)?; res diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 69ee7737a3d..3cb9fb952de 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -21,6 +21,11 @@ pub struct Options { /// This should be enabled when cloning to avoid checks for freshness of files. This also enables /// detection of collisions based on whether or not exclusive file creation succeeds or fails. pub destination_is_initially_empty: bool, + /// If true, default false, worktree entries on disk will be overwritten with content from the index + /// even if they appear to be changed. When creating directories that clash with existing worktree entries, + /// these will try to delete the existing entry. + /// This is similar in behaviour as `git checkout --force`. + pub overwrite_existing: bool, /// If true, default false, try to checkout as much as possible and don't abort on first error which isn't /// due to a conflict. /// The operation will never fail, but count the encountered errors instead along with their paths. @@ -46,6 +51,7 @@ impl Default for Options { keep_going: false, trust_ctime: true, check_stat: true, + overwrite_existing: false, } } } diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 9acb7b6624c..94961f17bce 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -36,7 +36,9 @@ where path: entry_path.to_owned(), } })?); - create_dir_all(dest.parent().expect("entry paths are never empty"))?; // TODO: can this be avoided to create dirs when needed only? + + let dest_dir = dest.parent().expect("entry paths are never empty"); + create_dir_all(dest_dir)?; // TODO: can this be avoided to create dirs when needed only? match entry.mode { git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { @@ -72,7 +74,7 @@ where // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether // a checkout is needed at all. if symlink { - symlink::symlink_auto(symlink_destination, &dest)?; + crate::os::create_symlink(symlink_destination, &dest)?; } else { std::fs::write(&dest, obj.data)?; } diff --git a/git-worktree/src/lib.rs b/git-worktree/src/lib.rs index 587771d9983..0bd3954b85d 100644 --- a/git-worktree/src/lib.rs +++ b/git-worktree/src/lib.rs @@ -10,3 +10,5 @@ pub mod fs; pub mod index; + +pub(crate) mod os; diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs new file mode 100644 index 00000000000..2b49b7fcd00 --- /dev/null +++ b/git-worktree/src/os.rs @@ -0,0 +1,28 @@ +use std::io; +use std::path::Path; + +#[cfg(not(windows))] +pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { + std::os::unix::fs::symlink(original, link) +} + +#[cfg(not(windows))] +pub fn remove_symlink(path: &Path) -> io::Result<()> { + std::fs::remove_file(path) +} + +#[cfg(windows)] +pub fn remove_symlink(path: &Path) -> io::Result<()> { + symlink::remove_symlink_auto(path) +} + +#[cfg(windows)] +pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { + use std::os::windows::fs::{symlink_dir, symlink_file}; + // TODO: figure out if links to links count as files or whatever they point at + if std::fs::metadata(link.parent().expect("dir for link").join(original))?.is_dir() { + symlink_dir(original, link) + } else { + symlink_file(original, original) + } +} From be5e3fb3a19f86e37244b17055bf31cc455e78e8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Mar 2022 18:14:51 +0800 Subject: [PATCH 04/19] gather more information about test failure on windows (#301) --- git-worktree/src/index/mod.rs | 2 +- git-worktree/tests/index/mod.rs | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index c54ab9ea6ee..1fc614c5d1f 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -31,7 +31,6 @@ where let res = entry::checkout(entry, entry_path, &mut find, root, options, &mut buf); match res { Ok(()) => {} - // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() #[cfg(windows)] Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => @@ -41,6 +40,7 @@ where error_kind: err.kind(), }); } + // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() #[cfg(not(windows))] Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { // We are here because a file existed or was blocked by a directory which shouldn't be possible unless diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index 460f7117140..917a452a8ed 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -68,6 +68,20 @@ mod checkout { let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); + let source_files = dir_structure(&source_tree); + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["d", "file_x", "link-to-X", "x"]), + "plenty of collisions prevent a checkout" + ); + + let dest_files = dir_structure(&destination); + assert_eq!( + stripped_prefix(&destination, &dest_files), + paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), + "we checkout files in order and generally handle collision detection differently, hence the difference" + ); + let error_kind = ErrorKind::AlreadyExists; #[cfg(windows)] let error_kind_dir = ErrorKind::PermissionDenied; @@ -100,20 +114,6 @@ mod checkout { ], "these files couldn't be checked out" ); - - let source_files = dir_structure(&source_tree); - assert_eq!( - stripped_prefix(&source_tree, &source_files), - paths(["d", "file_x", "link-to-X", "x"]), - "plenty of collisions prevent a checkout" - ); - - let dest_files = dir_structure(&destination); - assert_eq!( - stripped_prefix(&destination, &dest_files), - paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), - "we checkout files in order and generally handle collision detection differently, hence the difference" - ); } fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { From 60bcffc2d2921f01a0c8f42da9b43cd731eaf55d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Mar 2022 18:26:15 +0800 Subject: [PATCH 05/19] performance issue on windows is due to slow process execution speed (#301) --- git-ref/tests/packed/find.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/git-ref/tests/packed/find.rs b/git-ref/tests/packed/find.rs index ea965ebd782..fc8f5f16045 100644 --- a/git-ref/tests/packed/find.rs +++ b/git-ref/tests/packed/find.rs @@ -175,11 +175,7 @@ fn find_speed() -> crate::Result { let packed = store.open_packed_buffer()?.expect("packed-refs present"); let start = std::time::Instant::now(); let mut num_refs = 0; - #[cfg(windows)] - let count = 500; - #[cfg(not(windows))] - let count = 10_000; - for r in packed.iter()?.take(count) { + for r in packed.iter()?.take(10_000) { num_refs += 1; let r = r?; assert_eq!( From 4b1650ba1988f52a7a91ce4f5327eca350f32520 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Mar 2022 18:34:54 +0800 Subject: [PATCH 06/19] fix symlink creation on windows, hopefully (#301) --- git-worktree/src/os.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index 2b49b7fcd00..c8599a0a7b8 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -23,6 +23,26 @@ pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { if std::fs::metadata(link.parent().expect("dir for link").join(original))?.is_dir() { symlink_dir(original, link) } else { - symlink_file(original, original) + symlink_file(original, link) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn io_err_if_symlink_target_is_a_file() { + let dir = tempfile::tempdir().unwrap(); + let a = dir.path().join("a"); + let b = dir.path().join("b"); + + std::fs::write(&a, &[]).unwrap(); + std::fs::write(&b, &[]).unwrap(); + + assert_eq!( + create_symlink(&a, &b).unwrap_err().kind(), + std::io::ErrorKind::AlreadyExists + ); } } From e90c123675a98ab62fc6bb22019f889cee8b7301 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Mar 2022 19:04:08 +0800 Subject: [PATCH 07/19] support for unicode-precomposition for gix apps (#301) --- Cargo.lock | 1 + crate-status.md | 1 + git-repository/Cargo.toml | 3 +++ git-repository/src/lib.rs | 23 +++++++++++++++++++++++ git-worktree/src/os.rs | 20 -------------------- src/plumbing/main.rs | 2 +- src/porcelain/main.rs | 2 +- 7 files changed, 30 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 352882b0508..d92293e1cc4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1617,6 +1617,7 @@ dependencies = [ "signal-hook", "tempfile", "thiserror", + "unicode-normalization", ] [[package]] diff --git a/crate-status.md b/crate-status.md index f18d48d6402..e9f6d71b751 100644 --- a/crate-status.md +++ b/crate-status.md @@ -323,6 +323,7 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/git-lock/README. * [x] initialize * [ ] Proper configuration depending on platform (e.g. ignorecase, filemode, …) * [ ] All mutations are multi-process safe and this is tested and configurable (i.e. abort or wait if lock is encountered) +* support for unicode-precomposition of command-line arguments (needs explicit use in parent application) * **Easy** (_porcelain_) * **Id** * [x] short hashes with detection of ambiguity. diff --git a/git-repository/Cargo.toml b/git-repository/Cargo.toml index c94d13e9eb9..fb4d286a011 100644 --- a/git-repository/Cargo.toml +++ b/git-repository/Cargo.toml @@ -86,6 +86,9 @@ log = "0.4.14" document-features = { version = "0.2.0", optional = true } +[target.'cfg(target_vendor = "apple")'.dependencies] +unicode-normalization = { version = "0.1.19", default-features = false } + [dev-dependencies] git-testtools = { path = "../tests/tools" } anyhow = "1" diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 64bc082b3b2..98dc9abc2a7 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -286,3 +286,26 @@ pub mod discover { } } } + +/// +pub mod env { + use std::ffi::OsString; + + /// Equivalent to `std::env::args_os()`, but with precomposed unicode on MacOS and other apple platforms. + #[cfg(not(target_vendor = "apple"))] + pub fn args_os() -> impl Iterator { + std::env::args_os() + } + + /// Equivalent to `std::env::args_os()`, but with precomposed unicode on MacOS and other apple platforms. + /// + /// Note that this ignores `core.precomposeUnicode` as git-config isn't available yet. It's default enabled in modern git though. + #[cfg(target_vendor = "apple")] + pub fn args_os() -> impl Iterator { + use unicode_normalization::UnicodeNormalization; + std::env::args_os().map(|arg| match arg.to_str() { + Some(arg) => arg.nfc().collect::().into(), + None => arg, + }) + } +} diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index c8599a0a7b8..ba66fbd04c9 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -26,23 +26,3 @@ pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { symlink_file(original, link) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn io_err_if_symlink_target_is_a_file() { - let dir = tempfile::tempdir().unwrap(); - let a = dir.path().join("a"); - let b = dir.path().join("b"); - - std::fs::write(&a, &[]).unwrap(); - std::fs::write(&b, &[]).unwrap(); - - assert_eq!( - create_symlink(&a, &b).unwrap_err().kind(), - std::io::ErrorKind::AlreadyExists - ); - } -} diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index a38102c08eb..7df12a1ec0b 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -46,7 +46,7 @@ pub mod async_util { } pub fn main() -> Result<()> { - let args: Args = Args::parse(); + let args: Args = Args::parse_from(git_repository::env::args_os()); let thread_limit = args.threads; let verbose = args.verbose; let format = args.format; diff --git a/src/porcelain/main.rs b/src/porcelain/main.rs index b59e087eddf..4992801a126 100644 --- a/src/porcelain/main.rs +++ b/src/porcelain/main.rs @@ -13,7 +13,7 @@ use crate::{ }; pub fn main() -> Result<()> { - let args: Args = Args::parse(); + let args: Args = Args::parse_from(git_repository::env::args_os()); let should_interrupt = Arc::new(AtomicBool::new(false)); git_repository::interrupt::init_handler({ let should_interrupt = Arc::clone(&should_interrupt); From 039e822bb4e56e49432db5c53081e0eb39588d66 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Mar 2022 10:48:07 +0800 Subject: [PATCH 08/19] basic progress reporting for checkout (#301) --- git-worktree/src/index/entry.rs | 10 ++++++---- git-worktree/src/index/mod.rs | 12 +++++++++++- git-worktree/tests/index/mod.rs | 3 +++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 94961f17bce..6a8c51a2c15 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -27,7 +27,7 @@ pub fn checkout( .. }: index::checkout::Options, buf: &mut Vec, -) -> Result<(), index::checkout::Error> +) -> Result where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, { @@ -40,7 +40,7 @@ where let dest_dir = dest.parent().expect("entry paths are never empty"); create_dir_all(dest_dir)?; // TODO: can this be avoided to create dirs when needed only? - match entry.mode { + let object_size = match entry.mode { git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { oid: entry.id, @@ -62,6 +62,7 @@ where // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. // revisit this once there is a bug to fix. update_fstat(entry, file.metadata()?)?; + obj.data.len() } git_index::entry::Mode::SYMLINK => { let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { @@ -80,12 +81,13 @@ where } update_fstat(entry, std::fs::symlink_metadata(&dest)?)?; + obj.data.len() } git_index::entry::Mode::DIR => todo!(), git_index::entry::Mode::COMMIT => todo!(), _ => unreachable!(), - } - Ok(()) + }; + Ok(object_size) } fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> { diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 1fc614c5d1f..0788bd65481 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,3 +1,5 @@ +use git_features::progress; +use git_features::progress::Progress; use git_hash::oid; use crate::{index, index::checkout::Collision}; @@ -9,6 +11,8 @@ pub fn checkout( index: &mut git_index::State, path: impl AsRef, mut find: Find, + files: &mut impl Progress, + bytes: &mut impl Progress, options: checkout::Options, ) -> Result where @@ -22,15 +26,21 @@ where let root = path.as_ref(); let mut buf = Vec::new(); let mut collisions = Vec::new(); + + files.init(Some(index.entries().len()), progress::count("files")); + bytes.init(Some(index.entries().len()), progress::bytes()); + for (entry, entry_path) in index.entries_mut_with_paths() { // TODO: write test for that if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { + files.inc(); continue; } let res = entry::checkout(entry, entry_path, &mut find, root, options, &mut buf); + files.inc(); match res { - Ok(()) => {} + Ok(object_size) => bytes.inc_by(object_size), #[cfg(windows)] Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index 917a452a8ed..ce638a3b056 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -7,6 +7,7 @@ mod checkout { path::{Path, PathBuf}, }; + use git_features::progress; use git_object::bstr::ByteSlice; use git_odb::FindExt; use git_worktree::{fs::Capabilities, index, index::checkout::Collision}; @@ -176,6 +177,8 @@ mod checkout { &mut index, &destination, move |oid, buf| odb.find_blob(oid, buf).ok(), + &mut progress::Discard, + &mut progress::Discard, opts, )?; Ok((source_tree, destination, index, outcome)) From f23b8d2f1c4b767d337ec51888afaa8b3719798c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Mar 2022 11:44:26 +0800 Subject: [PATCH 09/19] basic version of index checkout via command-line (#301) For now just with empty files, but that shows that with a single thread it's faster than git at 27k files per second created compared to 14.5k of a single thread of git. We do the least amount of work necessary even without an lstat cache, but that probably changes once we have to access the ODB to obtain actual data. Note that git might also do an additional exists check per file to see if it changed, something we don't do as we may assume exclusive access to the directory and just go with that for now. Long story short: it looks like there is a lot of potential for performance improvements and I think there is a lot of room for being faster especially in multi-threaded mode. --- Cargo.lock | 1 + git-index/src/access.rs | 3 ++ git-repository/Cargo.toml | 3 +- git-repository/src/lib.rs | 3 ++ git-worktree/src/index/mod.rs | 4 -- git-worktree/tests/index/mod.rs | 9 +---- gitoxide-core/src/index/mod.rs | 67 +++++++++++++++++++++++++++++++++ src/plumbing/main.rs | 15 ++++++++ src/plumbing/options.rs | 10 ++--- 9 files changed, 97 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d92293e1cc4..cf0bf0a4a31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1613,6 +1613,7 @@ dependencies = [ "git-traverse 0.12.0", "git-url 0.3.5", "git-validate 0.5.3", + "git-worktree", "log", "signal-hook", "tempfile", diff --git a/git-index/src/access.rs b/git-index/src/access.rs index 123bdd32e34..92ae7309b20 100644 --- a/git-index/src/access.rs +++ b/git-index/src/access.rs @@ -10,6 +10,9 @@ impl State { pub fn entries(&self) -> &[Entry] { &self.entries } + pub fn entries_mut(&mut self) -> &mut [Entry] { + &mut self.entries + } pub fn entries_mut_with_paths(&mut self) -> impl Iterator { let paths = &self.path_backing; self.entries.iter_mut().map(move |e| { diff --git a/git-repository/Cargo.toml b/git-repository/Cargo.toml index fb4d286a011..c0c9c878572 100644 --- a/git-repository/Cargo.toml +++ b/git-repository/Cargo.toml @@ -49,7 +49,7 @@ max-performance = ["git-features/parallel", "git-features/zlib-ng-compat", "git- local-time-support = ["git-actor/local-time-support"] ## Re-export stability tier 2 crates for convenience and make `Repository` struct fields with types from these crates publicly accessible. ## Doing so is less stable than the stability tier 1 that `git-repository` is a member of. -unstable = ["git-index"] +unstable = ["git-index", "git-worktree"] ## Print debugging information about usage of object database caches, useful for tuning cache sizes. cache-efficiency-debug = ["git-features/cache-efficiency-debug"] @@ -77,6 +77,7 @@ git-features = { version = "^0.19.1", path = "../git-features", features = ["pro # unstable only git-index = { version ="^0.1.0", path = "../git-index", optional = true } +git-worktree = { version ="^0.0.0", path = "../git-worktree", optional = true } signal-hook = { version = "0.3.9", default-features = false } thiserror = "1.0.26" diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 98dc9abc2a7..5fc2ef6fb51 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -88,6 +88,7 @@ //! * [`actor`] //! * [`bstr`][bstr] //! * [`index`] +//! * [`worktree`] //! * [`objs`] //! * [`odb`] //! * [`pack`][odb::pack] @@ -144,6 +145,8 @@ pub use git_url as url; #[doc(inline)] #[cfg(all(feature = "unstable", feature = "git-url"))] pub use git_url::Url; +#[cfg(all(feature = "unstable", feature = "git-worktree"))] +pub use git_worktree as worktree; pub use hash::{oid, ObjectId}; pub mod interrupt; diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 0788bd65481..9d78b5e82a4 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,4 +1,3 @@ -use git_features::progress; use git_features::progress::Progress; use git_hash::oid; @@ -27,9 +26,6 @@ where let mut buf = Vec::new(); let mut collisions = Vec::new(); - files.init(Some(index.entries().len()), progress::count("files")); - bytes.init(Some(index.entries().len()), progress::bytes()); - for (entry, entry_path) in index.entries_mut_with_paths() { // TODO: write test for that if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index ce638a3b056..d365777f332 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -173,14 +173,7 @@ mod checkout { let odb = git_odb::at(git_dir.join("objects"))?; let destination = tempfile::tempdir()?; - let outcome = index::checkout( - &mut index, - &destination, - move |oid, buf| odb.find_blob(oid, buf).ok(), - &mut progress::Discard, - &mut progress::Discard, - opts, - )?; + let outcome = index::checkout(&mut index)?; Ok((source_tree, destination, index, outcome)) } diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index 42ea565aa69..d22eabcf650 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -1,6 +1,8 @@ +use anyhow::bail; use std::path::Path; use git_repository as git; +use git_repository::Progress; pub struct Options { pub object_hash: git::hash::Kind, @@ -98,3 +100,68 @@ fn parse_file(index_path: impl AsRef, object_hash: git::hash::Kind) -> any ) .map_err(Into::into) } + +pub fn checkout_exclusive( + index_path: impl AsRef, + dest_directory: impl AsRef, + mut progress: impl Progress, + Options { object_hash, .. }: Options, +) -> anyhow::Result<()> { + let dest_directory = dest_directory.as_ref(); + if dest_directory.exists() { + bail!( + "Refusing to checkout index into existing directory '{}' - remove it and try again", + dest_directory.display() + ) + } + std::fs::create_dir_all(dest_directory)?; + + let mut index = parse_file(index_path, object_hash)?; + + let mut num_skipped = 0; + for entry in index.entries_mut().iter_mut().filter(|e| { + e.mode + .contains(git::index::entry::Mode::DIR | git::index::entry::Mode::SYMLINK | git::index::entry::Mode::COMMIT) + }) { + entry.flags.insert(git::index::entry::Flags::SKIP_WORKTREE); + num_skipped += 1; + } + if num_skipped > 0 { + progress.info(format!("Skipping {} DIR/SYMLINK/COMMIT entries", num_skipped)); + } + + let opts = git::worktree::index::checkout::Options { + fs: git::worktree::fs::Capabilities::probe(dest_directory), + + // TODO: turn the two following flags into an enum + destination_is_initially_empty: true, + overwrite_existing: false, + ..Default::default() + }; + + let mut files = progress.add_child("checkout"); + let mut bytes = progress.add_child("writing"); + + let entries_for_checkout = index.entries().len() - num_skipped; + files.init(Some(entries_for_checkout), git::progress::count("files")); + bytes.init(Some(entries_for_checkout), git::progress::bytes()); + + let start = std::time::Instant::now(); + git::worktree::index::checkout( + &mut index, + dest_directory, + |_, buf| { + buf.clear(); + Some(git::objs::BlobRef { data: buf }) + }, + &mut files, + &mut bytes, + opts, + )?; + + files.show_throughput(start); + bytes.show_throughput(start); + + progress.done(format!("Created {} empty files", entries_for_checkout)); + Ok(()) +} diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 7df12a1ec0b..3d9e7ea37c6 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -78,6 +78,21 @@ pub fn main() -> Result<()> { index_path, cmd, }) => match cmd { + index::Subcommands::CheckoutExclusive { directory } => prepare_and_run( + "index-checkout", + verbose, + progress, + progress_keep_open, + None, + move |progress, _out, _err| { + core::index::checkout_exclusive( + index_path, + directory, + progress, + core::index::Options { object_hash, format }, + ) + }, + ), index::Subcommands::Info { no_details } => prepare_and_run( "index-entries", verbose, diff --git a/src/plumbing/options.rs b/src/plumbing/options.rs index 95f066e2329..68973190ddd 100644 --- a/src/plumbing/options.rs +++ b/src/plumbing/options.rs @@ -206,11 +206,9 @@ pub mod pack { sink_compress: bool, /// The '.pack' or '.idx' file to explode into loose objects - #[clap(parse(from_os_str))] pack_path: PathBuf, /// The path into which all objects should be written. Commonly '.git/objects' - #[clap(parse(from_os_str))] object_path: Option, }, /// Verify the integrity of a pack, index or multi-index file @@ -219,7 +217,6 @@ pub mod pack { args: VerifyOptions, /// The '.pack', '.idx' or 'multi-pack-index' file to validate. - #[clap(parse(from_os_str))] path: PathBuf, }, } @@ -316,7 +313,6 @@ pub mod pack { /// The folder into which to place the pack and the generated index file /// /// If unset, only informational output will be provided to standard output. - #[clap(parse(from_os_str))] directory: Option, }, } @@ -371,6 +367,11 @@ pub mod index { #[clap(long)] no_details: bool, }, + /// Checkout the index into a directory with exclusive write access, similar to what would happen during clone. + CheckoutExclusive { + /// The directory into which to write all index entries. + directory: PathBuf, + }, } } @@ -383,7 +384,6 @@ pub mod commitgraph { /// Verify the integrity of a commit graph Verify { /// The path to '.git/objects/info/', '.git/objects/info/commit-graphs/', or '.git/objects/info/commit-graph' to validate. - #[clap(parse(from_os_str))] path: PathBuf, /// output statistical information about the pack #[clap(long, short = 's')] From 5494fb3e1de1234dde8c47336597283dbd8bcb29 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Mar 2022 15:52:46 +0800 Subject: [PATCH 10/19] support for repo to write actual objects (#301) --- gitoxide-core/src/index/mod.rs | 54 ++++++++++++++++++++++++---------- src/plumbing/main.rs | 3 +- src/plumbing/options.rs | 4 +++ 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index d22eabcf650..bd4f946807a 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -1,8 +1,8 @@ use anyhow::bail; -use std::path::Path; +use std::path::{Path, PathBuf}; use git_repository as git; -use git_repository::Progress; +use git_repository::{odb::FindExt, Progress}; pub struct Options { pub object_hash: git::hash::Kind, @@ -104,9 +104,14 @@ fn parse_file(index_path: impl AsRef, object_hash: git::hash::Kind) -> any pub fn checkout_exclusive( index_path: impl AsRef, dest_directory: impl AsRef, + repo: Option, mut progress: impl Progress, Options { object_hash, .. }: Options, ) -> anyhow::Result<()> { + let repo = repo + .map(|dir| git_repository::discover(dir).map(|r| r.apply_environment())) + .transpose()?; + let dest_directory = dest_directory.as_ref(); if dest_directory.exists() { bail!( @@ -119,9 +124,14 @@ pub fn checkout_exclusive( let mut index = parse_file(index_path, object_hash)?; let mut num_skipped = 0; + let maybe_symlink_mode = if repo.is_some() { + git::index::entry::Mode::DIR + } else { + git::index::entry::Mode::SYMLINK + }; for entry in index.entries_mut().iter_mut().filter(|e| { e.mode - .contains(git::index::entry::Mode::DIR | git::index::entry::Mode::SYMLINK | git::index::entry::Mode::COMMIT) + .contains(maybe_symlink_mode | git::index::entry::Mode::DIR | git::index::entry::Mode::COMMIT) }) { entry.flags.insert(git::index::entry::Flags::SKIP_WORKTREE); num_skipped += 1; @@ -147,21 +157,35 @@ pub fn checkout_exclusive( bytes.init(Some(entries_for_checkout), git::progress::bytes()); let start = std::time::Instant::now(); - git::worktree::index::checkout( - &mut index, - dest_directory, - |_, buf| { - buf.clear(); - Some(git::objs::BlobRef { data: buf }) - }, - &mut files, - &mut bytes, - opts, - )?; + match &repo { + Some(repo) => git::worktree::index::checkout( + &mut index, + dest_directory, + |oid, buf| repo.objects.find_blob(oid, buf).ok(), + &mut files, + &mut bytes, + opts, + ), + None => git::worktree::index::checkout( + &mut index, + dest_directory, + |_, buf| { + buf.clear(); + Some(git::objs::BlobRef { data: buf }) + }, + &mut files, + &mut bytes, + opts, + ), + }?; files.show_throughput(start); bytes.show_throughput(start); - progress.done(format!("Created {} empty files", entries_for_checkout)); + progress.done(format!( + "Created {} {} files", + entries_for_checkout, + repo.is_none().then(|| "empty").unwrap_or_default() + )); Ok(()) } diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 3d9e7ea37c6..c2300926628 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -78,7 +78,7 @@ pub fn main() -> Result<()> { index_path, cmd, }) => match cmd { - index::Subcommands::CheckoutExclusive { directory } => prepare_and_run( + index::Subcommands::CheckoutExclusive { directory, repository } => prepare_and_run( "index-checkout", verbose, progress, @@ -88,6 +88,7 @@ pub fn main() -> Result<()> { core::index::checkout_exclusive( index_path, directory, + repository, progress, core::index::Options { object_hash, format }, ) diff --git a/src/plumbing/options.rs b/src/plumbing/options.rs index 68973190ddd..8d01894c3a1 100644 --- a/src/plumbing/options.rs +++ b/src/plumbing/options.rs @@ -369,6 +369,10 @@ pub mod index { }, /// Checkout the index into a directory with exclusive write access, similar to what would happen during clone. CheckoutExclusive { + /// The path to `.git` repository from which objects can be obtained to write the actual files referenced + /// in the index. Use this measure the impact on extracting objects on overall performance. + #[clap(long, short = 'r')] + repository: Option, /// The directory into which to write all index entries. directory: PathBuf, }, From 5388d8091ef02cf927478a1492847ae1666040d4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Mar 2022 16:40:45 +0800 Subject: [PATCH 11/19] allow writing empty files during checkout but also query the odb (#301) This should tell us how much time it really takes to do the ODB part, and see how much we could save by optimizing disk access, for instance by using an lstat cache. --- etc/check-package-size.sh | 2 +- git-worktree/tests/index/mod.rs | 9 ++++++++- gitoxide-core/src/index/mod.rs | 28 +++++++++++++++++++++++++--- src/plumbing/main.rs | 11 +++++++++-- src/plumbing/options.rs | 4 ++++ 5 files changed, 47 insertions(+), 7 deletions(-) diff --git a/etc/check-package-size.sh b/etc/check-package-size.sh index 0b9c48a5f94..1b9dff874ea 100755 --- a/etc/check-package-size.sh +++ b/etc/check-package-size.sh @@ -41,4 +41,4 @@ echo "in root: gitoxide CLI" (enter git-packetline && indent cargo diet -n --package-size-limit 35KB) (enter git-repository && indent cargo diet -n --package-size-limit 80KB) (enter git-transport && indent cargo diet -n --package-size-limit 50KB) -(enter gitoxide-core && indent cargo diet -n --package-size-limit 50KB) +(enter gitoxide-core && indent cargo diet -n --package-size-limit 60KB) diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index d365777f332..ce638a3b056 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -173,7 +173,14 @@ mod checkout { let odb = git_odb::at(git_dir.join("objects"))?; let destination = tempfile::tempdir()?; - let outcome = index::checkout(&mut index)?; + let outcome = index::checkout( + &mut index, + &destination, + move |oid, buf| odb.find_blob(oid, buf).ok(), + &mut progress::Discard, + &mut progress::Discard, + opts, + )?; Ok((source_tree, destination, index, outcome)) } diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index bd4f946807a..c7ac6291892 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -101,12 +101,23 @@ fn parse_file(index_path: impl AsRef, object_hash: git::hash::Kind) -> any .map_err(Into::into) } +pub mod checkout_exclusive { + pub struct Options { + pub index: super::Options, + /// If true, all files will be written with zero bytes despite having made an ODB lookup. + pub empty_files: bool, + } +} + pub fn checkout_exclusive( index_path: impl AsRef, dest_directory: impl AsRef, repo: Option, mut progress: impl Progress, - Options { object_hash, .. }: Options, + checkout_exclusive::Options { + index: Options { object_hash, .. }, + empty_files, + }: checkout_exclusive::Options, ) -> anyhow::Result<()> { let repo = repo .map(|dir| git_repository::discover(dir).map(|r| r.apply_environment())) @@ -124,7 +135,7 @@ pub fn checkout_exclusive( let mut index = parse_file(index_path, object_hash)?; let mut num_skipped = 0; - let maybe_symlink_mode = if repo.is_some() { + let maybe_symlink_mode = if !empty_files && repo.is_some() { git::index::entry::Mode::DIR } else { git::index::entry::Mode::SYMLINK @@ -161,7 +172,18 @@ pub fn checkout_exclusive( Some(repo) => git::worktree::index::checkout( &mut index, dest_directory, - |oid, buf| repo.objects.find_blob(oid, buf).ok(), + |oid, buf| { + repo.objects.find_blob(oid, buf).ok(); + if empty_files { + // We always want to query the ODB here… + repo.objects.find_blob(oid, buf).ok(); + buf.clear(); + // …but write nothing + Some(git::objs::BlobRef { data: buf }) + } else { + repo.objects.find_blob(oid, buf).ok() + } + }, &mut files, &mut bytes, opts, diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index c2300926628..22346e55660 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -78,7 +78,11 @@ pub fn main() -> Result<()> { index_path, cmd, }) => match cmd { - index::Subcommands::CheckoutExclusive { directory, repository } => prepare_and_run( + index::Subcommands::CheckoutExclusive { + directory, + empty_files, + repository, + } => prepare_and_run( "index-checkout", verbose, progress, @@ -90,7 +94,10 @@ pub fn main() -> Result<()> { directory, repository, progress, - core::index::Options { object_hash, format }, + core::index::checkout_exclusive::Options { + index: core::index::Options { object_hash, format }, + empty_files, + }, ) }, ), diff --git a/src/plumbing/options.rs b/src/plumbing/options.rs index 8d01894c3a1..43fda5ca779 100644 --- a/src/plumbing/options.rs +++ b/src/plumbing/options.rs @@ -373,6 +373,10 @@ pub mod index { /// in the index. Use this measure the impact on extracting objects on overall performance. #[clap(long, short = 'r')] repository: Option, + /// Enable to query the object database yet write only empty files. This is useful to measure the overhead of ODB query + /// compared to writing the bytes to disk. + #[clap(long, short = 'e', requires = "repository")] + empty_files: bool, /// The directory into which to write all index entries. directory: PathBuf, }, From 537e5aaa80fb21800d8ad856595c09018428f3eb Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Mar 2022 10:36:14 +0800 Subject: [PATCH 12/19] fix progress - there is no max value for bytes written (#301) --- gitoxide-core/src/index/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index c7ac6291892..6d12fa9997f 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -165,7 +165,7 @@ pub fn checkout_exclusive( let entries_for_checkout = index.entries().len() - num_skipped; files.init(Some(entries_for_checkout), git::progress::count("files")); - bytes.init(Some(entries_for_checkout), git::progress::bytes()); + bytes.init(None, git::progress::bytes()); let start = std::time::Instant::now(); match &repo { From a39d4768e36f27aababefd5bd519e51f33ffa7b6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Mar 2022 10:47:17 +0800 Subject: [PATCH 13/19] Properly use 'max-performance' feature toggle to get pack caches :D (#301) --- Cargo.toml | 2 +- src/shared.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c40facc7036..d253318fa51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ default = ["max"] ## Makes the crate execute as fast as possible by supporting parallel computation of otherwise long-running functions ## as well as fast, hardware accelerated hashing, along with a faster zlib backend. ## If disabled, the binary will be visibly smaller. -fast = ["git-features/parallel", "git-features/fast-sha1", "git-features/zlib-ng-compat"] +fast = ["git-features/parallel", "git-features/fast-sha1", "git-features/zlib-ng-compat", "git-repository/max-performance"] ## Use `clap` 3.0 to build the prettiest, best documented and most user-friendly CLI at the expense of binary size. ## Provides a terminal user interface for detailed and exhaustive progress. diff --git a/src/shared.rs b/src/shared.rs index dba706f1d99..0ffee9bb522 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -49,7 +49,7 @@ pub mod pretty { use std::io::{stderr, stdout}; use anyhow::Result; - use git_repository::progress; + use git_features::progress; use crate::shared::ProgressRange; From f4621cc4dd48fcd4b1aba294c811bc92f2715981 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Mar 2022 11:41:54 +0800 Subject: [PATCH 14/19] sketch out dir cache and realize that git uses chdir (#301) Git is able to use relative paths by changing the working directory. See https://github.com/git/git/blob/main/builtin.h#L82:L82 We probably shouldn't do that as a library, but will compose the path instead like we do now, but should consider using the cache for file and directory handling to avoid allocations. --- git-worktree/src/index/checkout.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 3cb9fb952de..2e290306b14 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -1,5 +1,23 @@ use bstr::BString; use quick_error::quick_error; +use std::path::PathBuf; + +/// A cache for directory creation to reduce the amount of stat calls when creating +/// directories safely, that is without following symlinks that might be on the way. +/// +/// As a special case, it offers a 'prefix' which (by itself) is assumed to exist and may contain symlinks. +/// Everything past that prefix boundary must not contain a symlink. +/// +/// For this to work, it remembers the last 'good' path to a directory and assumes that all components of it +/// are still valid, too. +/// As directories are created, the cache will be adjusted to reflect the latest seen directory. +/// +/// The caching is only useful if consecutive calls to create a directory are using a sorted list of entries. +#[allow(unused)] +pub(crate) struct DirCache { + /// the most recent known cached that we know is valid. + valid: PathBuf, +} #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Collision { From cb36d5691294971e1b0e097ed11908768283731a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Mar 2022 12:50:20 +0800 Subject: [PATCH 15/19] basic impl of the dir cache which already avoids unnecessary allocations (#301) --- git-worktree/src/index/checkout.rs | 67 +++++++++++++++++++++++++++++- git-worktree/src/index/entry.rs | 26 +++++------- git-worktree/src/index/mod.rs | 8 ++-- git-worktree/tests/index/mod.rs | 10 ++--- 4 files changed, 84 insertions(+), 27 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 2e290306b14..d54891cf1d3 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -6,7 +6,10 @@ use std::path::PathBuf; /// directories safely, that is without following symlinks that might be on the way. /// /// As a special case, it offers a 'prefix' which (by itself) is assumed to exist and may contain symlinks. -/// Everything past that prefix boundary must not contain a symlink. +/// Everything past that prefix boundary must not contain a symlink. We do this by allowing any input path. +/// +/// Another added benefit is its ability to store the path of full path of the entry to which leading directories +/// are to be created to avoid allocating memory. /// /// For this to work, it remembers the last 'good' path to a directory and assumes that all components of it /// are still valid, too. @@ -14,9 +17,69 @@ use std::path::PathBuf; /// /// The caching is only useful if consecutive calls to create a directory are using a sorted list of entries. #[allow(unused)] -pub(crate) struct DirCache { +pub struct PathCache { + /// The prefix/root for all paths we handle. + root: PathBuf, /// the most recent known cached that we know is valid. valid: PathBuf, + /// The amount of path components of 'valid' beyond the roots components. If `root` has 2, and this is 2, `valid` has 4 components. + valid_components: usize, +} + +mod cache { + use super::PathCache; + use std::path::{Path, PathBuf}; + + impl PathCache { + /// Create a new instance with `root` being the base for all future paths we handle, assuming it to be valid which includes + /// symbolic links to be included in it as well. + pub fn new(root: impl Into) -> Self { + let root = root.into(); + PathCache { + valid: root.clone(), + valid_components: 0, + root, + } + } + + /// Append the `relative` path to the root directory the cache contains and efficiently create leading directories + /// unless `mode` indicates `relative` points to a directory itself in which case the entire resulting path is created as directory. + /// + /// The full path to `relative` will be returned for use on the file system. + pub fn append_relative_path_assure_leading_dir( + &mut self, + relative: &Path, + mode: git_index::entry::Mode, + ) -> std::io::Result<&Path> { + debug_assert!( + relative.is_relative(), + "only index paths are handled correctly here, must be relative" + ); + assert!( + (git_index::entry::Mode::SYMLINK + | git_index::entry::Mode::FILE + | git_index::entry::Mode::FILE_EXECUTABLE) + .contains(mode), + "can only handle file-like items right now" + ); + + // TODO: handle valid state properly, handle _mode. + for _ in 0..self.valid_components { + self.valid.pop(); + } + + self.valid_components = 0; + for component in relative.iter() { + self.valid.push(component); + self.valid_components += 1; + } + + if self.valid_components > 1 { + std::fs::create_dir_all(self.valid.parent().expect("directory"))?; + } + Ok(&self.valid) + } + } } #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 6a8c51a2c15..19860c700b1 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -1,22 +1,18 @@ -use std::{ - convert::TryInto, - fs::{create_dir_all, OpenOptions}, - io::Write, - time::Duration, -}; +use std::{convert::TryInto, fs::OpenOptions, io::Write, time::Duration}; use bstr::BStr; use git_hash::oid; use git_index::Entry; use crate::index; +use crate::index::checkout::PathCache; #[cfg_attr(not(unix), allow(unused_variables))] pub fn checkout( entry: &mut Entry, entry_path: &BStr, find: &mut Find, - root: &std::path::Path, + cache: &mut PathCache, index::checkout::Options { fs: crate::fs::Capabilities { symlink, @@ -31,20 +27,18 @@ pub fn checkout( where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, { - let dest = root.join(git_features::path::from_byte_slice(entry_path).map_err(|_| { - index::checkout::Error::IllformedUtf8 { + let dest = cache.append_relative_path_assure_leading_dir( + git_features::path::from_byte_slice(entry_path).map_err(|_| index::checkout::Error::IllformedUtf8 { path: entry_path.to_owned(), - } - })?); - - let dest_dir = dest.parent().expect("entry paths are never empty"); - create_dir_all(dest_dir)?; // TODO: can this be avoided to create dirs when needed only? + })?, + entry.mode, + )?; let object_size = match entry.mode { git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { oid: entry.id, - path: root.to_path_buf(), + path: dest.to_path_buf(), })?; let mut options = OpenOptions::new(); options @@ -67,7 +61,7 @@ where git_index::entry::Mode::SYMLINK => { let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { oid: entry.id, - path: root.to_path_buf(), + path: dest.to_path_buf(), })?; let symlink_destination = git_features::path::from_byte_slice(obj.data) .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 9d78b5e82a4..41561124b03 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,6 +1,7 @@ use git_features::progress::Progress; use git_hash::oid; +use crate::index::checkout::PathCache; use crate::{index, index::checkout::Collision}; pub mod checkout; @@ -8,7 +9,7 @@ pub(crate) mod entry; pub fn checkout( index: &mut git_index::State, - path: impl AsRef, + dir: impl Into, mut find: Find, files: &mut impl Progress, bytes: &mut impl Progress, @@ -22,7 +23,8 @@ where } use std::io::ErrorKind::AlreadyExists; - let root = path.as_ref(); + let dir = dir.into(); + let mut path_cache = PathCache::new(dir.clone()); let mut buf = Vec::new(); let mut collisions = Vec::new(); @@ -33,7 +35,7 @@ where continue; } - let res = entry::checkout(entry, entry_path, &mut find, root, options, &mut buf); + let res = entry::checkout(entry, entry_path, &mut find, &mut path_cache, options, &mut buf); files.inc(); match res { Ok(object_size) => bytes.inc_by(object_size), diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index ce638a3b056..a23fb9df2b6 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -16,15 +16,13 @@ mod checkout { use crate::fixture_path; #[test] - fn symlinks_become_files_if_disabled() -> crate::Result { + fn symlinks_become_files_if_disabled() { let opts = opts_with_symlink(false); let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; + checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules").unwrap(); - assert_equality(&source_tree, &destination, opts.fs.symlink)?; + assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); assert!(outcome.collisions.is_empty()); - - Ok(()) } #[test] @@ -175,7 +173,7 @@ mod checkout { let outcome = index::checkout( &mut index, - &destination, + destination.path(), move |oid, buf| odb.find_blob(oid, buf).ok(), &mut progress::Discard, &mut progress::Discard, From a3501df6eb8d2fd3176434c80c443316e91dabb6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Mar 2022 13:20:51 +0800 Subject: [PATCH 16/19] avoid popping the entire cached path (#301) This comes at a cost, but hopefully this is justified performance wise but most importantly, makes the upcoming implementation of the lstat cache possible or easier. --- git-worktree/src/index/checkout.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index d54891cf1d3..01e0b6b361d 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -22,6 +22,8 @@ pub struct PathCache { root: PathBuf, /// the most recent known cached that we know is valid. valid: PathBuf, + /// The relative portion of `valid` that was added previously. + valid_relative: PathBuf, /// The amount of path components of 'valid' beyond the roots components. If `root` has 2, and this is 2, `valid` has 4 components. valid_components: usize, } @@ -37,6 +39,7 @@ mod cache { let root = root.into(); PathCache { valid: root.clone(), + valid_relative: PathBuf::with_capacity(128), valid_components: 0, root, } @@ -63,14 +66,27 @@ mod cache { "can only handle file-like items right now" ); + let mut components = relative.components().peekable(); + let mut existing_components = self.valid_relative.components(); + let mut matching_components = 0; + while let (Some(existing_comp), Some(new_comp)) = (existing_components.next(), components.peek()) { + if existing_comp == *new_comp { + components.next(); + matching_components += 1; + } else { + break; + } + } + // TODO: handle valid state properly, handle _mode. - for _ in 0..self.valid_components { + for _ in 0..self.valid_components - matching_components { self.valid.pop(); } - self.valid_components = 0; - for component in relative.iter() { - self.valid.push(component); + self.valid_components = matching_components; + for comp in components { + self.valid.push(comp); + self.valid_relative.push(comp); self.valid_components += 1; } From de58f50748bd70e39d29e503a7f4b1e6c9b20093 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Mar 2022 14:06:23 +0800 Subject: [PATCH 17/19] forbid symlinks and files in the path (#301) We now definitely do extra work in some cases, but save a lot in others thanks to the sorted input. --- git-worktree/src/index/checkout.rs | 34 +++++++----- git-worktree/tests/index/mod.rs | 84 ++++++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 15 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 01e0b6b361d..ffbfa574260 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -26,6 +26,10 @@ pub struct PathCache { valid_relative: PathBuf, /// The amount of path components of 'valid' beyond the roots components. If `root` has 2, and this is 2, `valid` has 4 components. valid_components: usize, + + /// just for testing + #[cfg(debug_assertions)] + pub test_mkdir_calls: usize, } mod cache { @@ -42,6 +46,7 @@ mod cache { valid_relative: PathBuf::with_capacity(128), valid_components: 0, root, + test_mkdir_calls: 0, } } @@ -51,20 +56,14 @@ mod cache { /// The full path to `relative` will be returned for use on the file system. pub fn append_relative_path_assure_leading_dir( &mut self, - relative: &Path, + relative: impl AsRef, mode: git_index::entry::Mode, ) -> std::io::Result<&Path> { + let relative = relative.as_ref(); debug_assert!( relative.is_relative(), "only index paths are handled correctly here, must be relative" ); - assert!( - (git_index::entry::Mode::SYMLINK - | git_index::entry::Mode::FILE - | git_index::entry::Mode::FILE_EXECUTABLE) - .contains(mode), - "can only handle file-like items right now" - ); let mut components = relative.components().peekable(); let mut existing_components = self.valid_relative.components(); @@ -84,15 +83,26 @@ mod cache { } self.valid_components = matching_components; - for comp in components { + + let target_is_dir = mode == git_index::entry::Mode::COMMIT || mode == git_index::entry::Mode::DIR; + while let Some(comp) = components.next() { self.valid.push(comp); self.valid_relative.push(comp); self.valid_components += 1; + if components.peek().is_some() || target_is_dir { + self.test_mkdir_calls += 1; + match std::fs::create_dir(&self.valid) { + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { + if !self.valid.symlink_metadata()?.is_dir() { + return Err(err); + } + } + Err(err) => return Err(err), + } + } } - if self.valid_components > 1 { - std::fs::create_dir_all(self.valid.parent().expect("directory"))?; - } Ok(&self.valid) } } diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index a23fb9df2b6..a7a51a7f3d0 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -1,4 +1,81 @@ mod checkout { + mod cache { + use git_index::entry::Mode; + use git_worktree::index::checkout::PathCache; + use std::path::Path; + use tempfile::{tempdir, TempDir}; + + #[test] + fn root_is_assumed_to_exist_and_files_in_root_do_not_create_directory() { + let dir = tempdir().unwrap(); + let mut cache = PathCache::new(dir.path().join("non-existing-root")); + assert_eq!(cache.test_mkdir_calls, 0); + + let path = cache + .append_relative_path_assure_leading_dir("hello", Mode::FILE) + .unwrap(); + assert!(!path.parent().unwrap().exists(), "prefix itself is never created"); + assert_eq!(cache.test_mkdir_calls, 0); + } + + #[test] + fn directory_paths_are_created_in_full() { + let (mut cache, _tmp) = new_cache(); + + for (name, mode) in &[ + ("dir", Mode::DIR), + ("submodule", Mode::COMMIT), + ("file", Mode::FILE), + ("exe", Mode::FILE_EXECUTABLE), + ("link", Mode::SYMLINK), + ] { + let path = cache + .append_relative_path_assure_leading_dir(Path::new("dir").join(name), *mode) + .unwrap(); + assert!(path.parent().unwrap().is_dir(), "dir exists"); + } + + assert_eq!(cache.test_mkdir_calls, 3); + } + + #[test] + fn existing_directories_are_fine() { + let (mut cache, tmp) = new_cache(); + std::fs::create_dir(tmp.path().join("dir")).unwrap(); + + let path = cache + .append_relative_path_assure_leading_dir("dir/file", Mode::FILE) + .unwrap(); + assert!(path.parent().unwrap().is_dir(), "directory is still present"); + assert!(!path.exists(), "it won't create the file"); + } + + #[test] + fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() { + let (mut cache, tmp) = new_cache(); + let forbidden = tmp.path().join("forbidden"); + std::fs::create_dir(&forbidden).unwrap(); + symlink::symlink_dir(&forbidden, tmp.path().join("link-to-dir")).unwrap(); + std::fs::write(tmp.path().join("file-in-dir"), &[]).unwrap(); + + for dirname in &["link-to-dir", "file-in-dir"] { + assert_eq!( + cache + .append_relative_path_assure_leading_dir(format!("{}/file", dirname), Mode::FILE) + .unwrap_err() + .kind(), + std::io::ErrorKind::AlreadyExists + ); + } + } + + fn new_cache() -> (PathCache, TempDir) { + let dir = tempdir().unwrap(); + let cache = PathCache::new(dir.path()); + (cache, dir) + } + } + #[cfg(unix)] use std::os::unix::prelude::MetadataExt; use std::{ @@ -16,13 +93,14 @@ mod checkout { use crate::fixture_path; #[test] - fn symlinks_become_files_if_disabled() { + fn symlinks_become_files_if_disabled() -> crate::Result { let opts = opts_with_symlink(false); let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules").unwrap(); + checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; - assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); + assert_equality(&source_tree, &destination, opts.fs.symlink)?; assert!(outcome.collisions.is_empty()); + Ok(()) } #[test] From 749c3100d785f7ac373bdb109fda21f2ac62d5c0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Mar 2022 14:16:37 +0800 Subject: [PATCH 18/19] Support for forceful removal of symlinks or files during dir creation (#301) --- git-worktree/src/index/checkout.rs | 26 ++++++++++++++++++++++++-- git-worktree/src/index/mod.rs | 3 +++ git-worktree/tests/index/mod.rs | 13 ++++++++++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index ffbfa574260..cc7da1001fd 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -27,6 +27,9 @@ pub struct PathCache { /// The amount of path components of 'valid' beyond the roots components. If `root` has 2, and this is 2, `valid` has 4 components. valid_components: usize, + /// If there is a symlink or a file in our path, try to unlink it before creating the directory. + pub unlink_on_collision: bool, + /// just for testing #[cfg(debug_assertions)] pub test_mkdir_calls: usize, @@ -46,7 +49,9 @@ mod cache { valid_relative: PathBuf::with_capacity(128), valid_components: 0, root, + #[cfg(debug_assertions)] test_mkdir_calls: 0, + unlink_on_collision: false, } } @@ -90,11 +95,28 @@ mod cache { self.valid_relative.push(comp); self.valid_components += 1; if components.peek().is_some() || target_is_dir { - self.test_mkdir_calls += 1; + #[cfg(debug_assertions)] + { + self.test_mkdir_calls += 1; + } match std::fs::create_dir(&self.valid) { Ok(()) => {} Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { - if !self.valid.symlink_metadata()?.is_dir() { + let meta = self.valid.symlink_metadata()?; + if !meta.is_dir() { + if self.unlink_on_collision { + if meta.is_symlink() { + symlink::remove_symlink_auto(&self.valid)?; + } else { + std::fs::remove_file(&self.valid)?; + } + #[cfg(debug_assertions)] + { + self.test_mkdir_calls += 1; + } + std::fs::create_dir(&self.valid)?; + continue; + } return Err(err); } } diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 41561124b03..825505656ac 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -24,7 +24,10 @@ where use std::io::ErrorKind::AlreadyExists; let dir = dir.into(); + let mut path_cache = PathCache::new(dir.clone()); + path_cache.unlink_on_collision = options.overwrite_existing; + let mut buf = Vec::new(); let mut collisions = Vec::new(); diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index a7a51a7f3d0..f7887ae01db 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -48,6 +48,7 @@ mod checkout { .unwrap(); assert!(path.parent().unwrap().is_dir(), "directory is still present"); assert!(!path.exists(), "it won't create the file"); + assert_eq!(cache.test_mkdir_calls, 1); } #[test] @@ -59,14 +60,24 @@ mod checkout { std::fs::write(tmp.path().join("file-in-dir"), &[]).unwrap(); for dirname in &["link-to-dir", "file-in-dir"] { + cache.unlink_on_collision = false; + let relative_path = format!("{}/file", dirname); assert_eq!( cache - .append_relative_path_assure_leading_dir(format!("{}/file", dirname), Mode::FILE) + .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) .unwrap_err() .kind(), std::io::ErrorKind::AlreadyExists ); + cache.unlink_on_collision = true; + + let path = cache + .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) + .unwrap(); + assert!(path.parent().unwrap().is_dir(), "directory was forcefully created"); + assert!(!path.exists()); } + assert_eq!(cache.test_mkdir_calls, 4); } fn new_cache() -> (PathCache, TempDir) { From 0e2a2438da35c0abb412682b103e5be171b1c3ad Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Mar 2022 14:26:31 +0800 Subject: [PATCH 19/19] thanks clippy --- git-worktree/src/index/entry.rs | 2 +- git-worktree/src/index/mod.rs | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 19860c700b1..1aad218b075 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -69,7 +69,7 @@ where // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether // a checkout is needed at all. if symlink { - crate::os::create_symlink(symlink_destination, &dest)?; + crate::os::create_symlink(symlink_destination, dest)?; } else { std::fs::write(&dest, obj.data)?; } diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 825505656ac..6ffba9960e1 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -23,9 +23,7 @@ where } use std::io::ErrorKind::AlreadyExists; - let dir = dir.into(); - - let mut path_cache = PathCache::new(dir.clone()); + let mut path_cache = PathCache::new(dir.into()); path_cache.unlink_on_collision = options.overwrite_existing; let mut buf = Vec::new();