From bacc65481d4ff5ecfbdf3755383b60f354deaf47 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 6 Mar 2022 20:36:42 +0800 Subject: [PATCH 01/37] switch worktree to thiserror (#301) --- Cargo.lock | 2 +- git-worktree/Cargo.toml | 2 +- git-worktree/src/index/checkout.rs | 35 +++++++++++------------------- 3 files changed, 15 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ad5297deca..722952b753d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1793,10 +1793,10 @@ dependencies = [ "git-object 0.17.1", "git-odb 0.27.0", "git-testtools", - "quick-error", "serde", "symlink", "tempfile", + "thiserror", "walkdir", ] diff --git a/git-worktree/Cargo.toml b/git-worktree/Cargo.toml index 9f93704ff69..fcc45a00f34 100644 --- a/git-worktree/Cargo.toml +++ b/git-worktree/Cargo.toml @@ -24,7 +24,7 @@ git-features = { version = "^0.19.1", path = "../git-features" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} -quick-error = "2.0.1" +thiserror = "1.0.26" bstr = { version = "0.2.13", default-features = false } document-features = { version = "0.2.0", optional = true } diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 9241326fba8..49e7a95e5c1 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -1,5 +1,4 @@ use bstr::BString; -use quick_error::quick_error; use std::path::PathBuf; /// A cache for directory creation to reduce the amount of stat calls when creating @@ -192,25 +191,17 @@ impl Default for Options { } } } - -quick_error! { - #[derive(Debug)] - pub enum Error { - IllformedUtf8{ path: BString } { - display("Could not convert path to UTF8: {}", path) - } - Time(err: std::time::SystemTimeError) { - from() - source(err) - display("The clock was off when reading file related metadata after updating a file on disk") - } - Io(err: std::io::Error) { - from() - source(err) - display("IO error while writing blob or reading file metadata or changing filetype") - } - ObjectNotFound{ oid: git_hash::ObjectId, path: std::path::PathBuf } { - display("object {} for checkout at {} not found in object database", oid.to_hex(), path.display()) - } - } +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Could not convert path to UTF8: {}", .path)] + IllformedUtf8 { path: BString }, + #[error("The clock was off when reading file related metadata after updating a file on disk")] + Time(#[from] std::time::SystemTimeError), + #[error("IO error while writing blob or reading file metadata or changing filetype")] + Io(#[from] std::io::Error), + #[error("object {} for checkout at {} not found in object database", .oid.to_hex(), .path.display())] + ObjectNotFound { + oid: git_hash::ObjectId, + path: std::path::PathBuf, + }, } From f9beac0471a38cb4c3b070ecb576ed1a39456bd6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 6 Mar 2022 20:46:41 +0800 Subject: [PATCH 02/37] return proper errors during checkout object lookup (#301) --- git-worktree/src/index/checkout.rs | 8 +++++--- git-worktree/src/index/entry.rs | 18 ++++++++++++------ git-worktree/src/index/mod.rs | 7 ++++--- git-worktree/tests/index/mod.rs | 4 ++-- gitoxide-core/src/index/mod.rs | 8 ++++---- 5 files changed, 27 insertions(+), 18 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 49e7a95e5c1..2c37025933d 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -192,15 +192,17 @@ impl Default for Options { } } #[derive(Debug, thiserror::Error)] -pub enum Error { +pub enum Error { #[error("Could not convert path to UTF8: {}", .path)] IllformedUtf8 { path: BString }, #[error("The clock was off when reading file related metadata after updating a file on disk")] Time(#[from] std::time::SystemTimeError), #[error("IO error while writing blob or reading file metadata or changing filetype")] Io(#[from] std::io::Error), - #[error("object {} for checkout at {} not found in object database", .oid.to_hex(), .path.display())] - ObjectNotFound { + #[error("object {} for checkout at {} could not be retrieved from object database", .oid.to_hex(), .path.display())] + Find { + #[source] + err: E, oid: git_hash::ObjectId, path: std::path::PathBuf, }, diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 1aad218b075..2e883733cb4 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -8,7 +8,7 @@ use crate::index; use crate::index::checkout::PathCache; #[cfg_attr(not(unix), allow(unused_variables))] -pub fn checkout( +pub fn checkout( entry: &mut Entry, entry_path: &BStr, find: &mut Find, @@ -23,9 +23,10 @@ pub fn checkout( .. }: index::checkout::Options, buf: &mut Vec, -) -> Result +) -> Result> where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, { let dest = cache.append_relative_path_assure_leading_dir( git_features::path::from_byte_slice(entry_path).map_err(|_| index::checkout::Error::IllformedUtf8 { @@ -36,7 +37,8 @@ where let object_size = match entry.mode { git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { - let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { + let obj = find(&entry.id, buf).map_err(|err| index::checkout::Error::Find { + err, oid: entry.id, path: dest.to_path_buf(), })?; @@ -59,7 +61,8 @@ where obj.data.len() } git_index::entry::Mode::SYMLINK => { - let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { + let obj = find(&entry.id, buf).map_err(|err| index::checkout::Error::Find { + err, oid: entry.id, path: dest.to_path_buf(), })?; @@ -84,7 +87,10 @@ where Ok(object_size) } -fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> { +fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> +where + E: std::error::Error + Send + Sync + 'static, +{ let ctime = meta .created() .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index f3c281def9b..6859e2e5a0a 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -7,16 +7,17 @@ use crate::index::checkout::{ErrorRecord, PathCache}; pub mod checkout; pub(crate) mod entry; -pub fn checkout( +pub fn checkout( index: &mut git_index::State, dir: impl Into, mut find: Find, files: &mut impl Progress, bytes: &mut impl Progress, options: checkout::Options, -) -> Result +) -> Result> where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, { if !options.destination_is_initially_empty { todo!("deal with non-clone checkouts") diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index 1bb2e53dc6f..a8e1bbc697f 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -312,9 +312,9 @@ mod checkout { destination.path(), move |oid, buf| { if allow_return_object(oid) { - odb.find_blob(oid, buf).ok() + odb.find_blob(oid, buf) } else { - None + Err(git_odb::find::existing_object::Error::NotFound { oid: oid.to_owned() }) } }, &mut progress::Discard, diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index 7f1dc156cd6..5ff85231e21 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -181,12 +181,12 @@ pub fn checkout_exclusive( repo.objects.find_blob(oid, buf).ok(); if empty_files { // We always want to query the ODB here… - repo.objects.find_blob(oid, buf).ok(); + repo.objects.find_blob(oid, buf)?; buf.clear(); // …but write nothing - Some(git::objs::BlobRef { data: buf }) + Ok(git::objs::BlobRef { data: buf }) } else { - repo.objects.find_blob(oid, buf).ok() + repo.objects.find_blob(oid, buf) } }, &mut files, @@ -198,7 +198,7 @@ pub fn checkout_exclusive( dest_directory, |_, buf| { buf.clear(); - Some(git::objs::BlobRef { data: buf }) + Ok(git::objs::BlobRef { data: buf }) }, &mut files, &mut bytes, From 99de1ef494719cb4d46e3414474e619225fe7bd4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 08:39:50 +0800 Subject: [PATCH 03/37] mior refactor and notes towards parallelization (#301) --- git-worktree/src/index/checkout.rs | 1 - git-worktree/src/index/entry.rs | 24 ++++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 2c37025933d..edbfed4ba2c 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -81,7 +81,6 @@ mod cache { } } - // TODO: handle valid state properly, handle _mode. for _ in 0..self.valid_components - matching_components { self.valid.pop(); } diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 2e883733cb4..1c92d5830c8 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -20,6 +20,7 @@ pub fn checkout( .. }, destination_is_initially_empty, + overwrite_existing, .. }: index::checkout::Options, buf: &mut Vec, @@ -42,11 +43,7 @@ where oid: entry.id, path: dest.to_path_buf(), })?; - let mut options = OpenOptions::new(); - options - .create_new(destination_is_initially_empty) - .create(!destination_is_initially_empty) - .write(true); + let mut options = open_options(destination_is_initially_empty, overwrite_existing); #[cfg(unix)] if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE { use std::os::unix::fs::OpenOptionsExt; @@ -72,9 +69,15 @@ where // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether // a checkout is needed at all. if symlink { + // TODO: handle 'overwrite_existing' mode, which checks for 'exists' errors and unlinks existing files + // or directories or symlinks. Doing this shouldn't invalidate the cache as it's only valid till + // our parent anyway, but it may invalidate caches in other threads without their knowledge. + // collisions with overwrite mode must be handled with great care in parallel mode. crate::os::create_symlink(symlink_destination, dest)?; } else { - std::fs::write(&dest, obj.data)?; + open_options(destination_is_initially_empty, overwrite_existing) + .open(&dest)? + .write_all(obj.data)?; } update_fstat(entry, std::fs::symlink_metadata(&dest)?)?; @@ -87,6 +90,15 @@ where Ok(object_size) } +fn open_options(destination_is_initially_empty: bool, overwrite_existing: bool) -> OpenOptions { + let mut options = OpenOptions::new(); + options + .create_new(destination_is_initially_empty && !overwrite_existing) + .create(!destination_is_initially_empty || overwrite_existing) + .write(true); + options +} + fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> where E: std::error::Error + Send + Sync + 'static, From 805c0da62204b8c4675c9c098e10eb0fe2bc12a9 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 10:25:38 +0800 Subject: [PATCH 04/37] =?UTF-8?q?a=20stab=20at=20making=20file=20writes=20?= =?UTF-8?q?safer=E2=80=A6=20(#301)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …and maybe there are better ways using NO_FOLLOW. it's definitely something that wants more testing. --- Cargo.lock | 1 + git-worktree/Cargo.toml | 3 + git-worktree/src/index/entry.rs | 33 +- .../tests/fixtures/make_dangerous_symlink.sh | 25 ++ git-worktree/tests/index/checkout.rs | 390 ++++++++++++++++++ git-worktree/tests/index/mod.rs | 352 +--------------- 6 files changed, 450 insertions(+), 354 deletions(-) create mode 100644 git-worktree/tests/fixtures/make_dangerous_symlink.sh create mode 100644 git-worktree/tests/index/checkout.rs diff --git a/Cargo.lock b/Cargo.lock index 722952b753d..7ef29d51f00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1793,6 +1793,7 @@ dependencies = [ "git-object 0.17.1", "git-odb 0.27.0", "git-testtools", + "libc", "serde", "symlink", "tempfile", diff --git a/git-worktree/Cargo.toml b/git-worktree/Cargo.toml index fcc45a00f34..012e17cd9e3 100644 --- a/git-worktree/Cargo.toml +++ b/git-worktree/Cargo.toml @@ -30,6 +30,9 @@ bstr = { version = "0.2.13", default-features = false } document-features = { version = "0.2.0", optional = true } symlink = "0.1.0" +[target.'cfg(unix)'.dependencies] +libc = "0.2.119" + [dev-dependencies] git-testtools = { path = "../tests/tools" } git-odb = { path = "../git-odb" } diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 1c92d5830c8..f86c2771a3e 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -1,3 +1,4 @@ +use std::path::Path; use std::{convert::TryInto, fs::OpenOptions, io::Write, time::Duration}; use bstr::BStr; @@ -43,7 +44,8 @@ where oid: entry.id, path: dest.to_path_buf(), })?; - let mut options = open_options(destination_is_initially_empty, overwrite_existing); + + let mut options = open_options(dest, destination_is_initially_empty, overwrite_existing); #[cfg(unix)] if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE { use std::os::unix::fs::OpenOptionsExt; @@ -75,7 +77,7 @@ where // collisions with overwrite mode must be handled with great care in parallel mode. crate::os::create_symlink(symlink_destination, dest)?; } else { - open_options(destination_is_initially_empty, overwrite_existing) + open_options(dest, destination_is_initially_empty, overwrite_existing) .open(&dest)? .write_all(obj.data)?; } @@ -90,12 +92,37 @@ where Ok(object_size) } -fn open_options(destination_is_initially_empty: bool, overwrite_existing: bool) -> OpenOptions { +/// This is a debug assertion as we expect the machinery calling this to prevent this possibility in the first place +fn debug_assert_dest_is_no_symlink(path: &Path) { + #[cfg(debug_assertions)] + { + if let Ok(meta) = path.metadata() { + debug_assert!( + !meta.is_symlink(), + "BUG: should not ever allow to overwrite/write-into the target of a symbolic link: {}", + path.display() + ); + } + } +} + +fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_existing: bool) -> OpenOptions { + if overwrite_existing || !destination_is_initially_empty { + debug_assert_dest_is_no_symlink(path); + } let mut options = OpenOptions::new(); options .create_new(destination_is_initially_empty && !overwrite_existing) .create(!destination_is_initially_empty || overwrite_existing) .write(true); + #[cfg(unix)] + { + /// Make sure that it's impossible to follow through to the target of symlinks. + /// Note that this will still follow symlinks in the path, which is what we assume + /// has been checked separately. + use std::os::unix::fs::OpenOptionsExt; + options.custom_flags(libc::O_NOFOLLOW); + } options } diff --git a/git-worktree/tests/fixtures/make_dangerous_symlink.sh b/git-worktree/tests/fixtures/make_dangerous_symlink.sh new file mode 100644 index 00000000000..c538a80dbdf --- /dev/null +++ b/git-worktree/tests/fixtures/make_dangerous_symlink.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q +git config commit.gpgsign false + +# Every symlink is dangerous as it might either link to another directory and thus redirect +# all writes in the path, or it might point to a file and opening the symlink actually opens +# the target. +# We handle this by either validating symlinks specifically or create symlinks +empty_oid=$(git hash-object -w --stdin (PathCache, TempDir) { + let dir = tempdir().unwrap(); + let cache = PathCache::new(dir.path()); + (cache, dir) + } +} + +use bstr::ByteVec; +#[cfg(unix)] +use std::os::unix::prelude::MetadataExt; +use std::{ + fs, + io::ErrorKind, + path::{Path, PathBuf}, +}; + +use git_features::progress; +use git_object::bstr::ByteSlice; +use git_odb::FindExt; +use git_worktree::{fs::Capabilities, index, index::checkout::Collision}; +use std::io::ErrorKind::AlreadyExists; +use tempfile::TempDir; + +use crate::fixture_path; + +#[test] +fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden() { + let mut opts = opts_with_symlink(true); + // without overwrite mode, everything is safe. + opts.overwrite_existing = false; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_dangerous_symlink").unwrap(); + + let source_files = dir_structure(&source_tree); + let worktree_files = dir_structure(&destination); + + let fs_caps = probe_gitoxide_dir().unwrap(); + if fs_caps.ignore_case { + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]) + ); + assert_eq!( + stripped_prefix(&destination, &worktree_files), + paths(["A-dir/a", "A-file", "FAKE-FILE"]) + ); + assert_eq!( + outcome.collisions, + vec![ + Collision { + path: "fake-dir/b".into(), + error_kind: AlreadyExists + }, + Collision { + path: "fake-file".into(), + error_kind: AlreadyExists + } + ] + ); + } else { + let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; + assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); + assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); + assert!(outcome.collisions.is_empty()); + }; +} + +#[test] +fn symlinks_become_files_if_disabled() -> crate::Result { + let opts = opts_with_symlink(false); + let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; + + assert_equality(&source_tree, &destination, opts.fs.symlink)?; + assert!(outcome.collisions.is_empty()); + Ok(()) +} + +#[test] +fn allow_symlinks() -> crate::Result { + let opts = opts_with_symlink(true); + if !probe_gitoxide_dir()?.symlink { + eprintln!("IGNORING symlink test on file system without symlink support"); + // skip if symlinks aren't supported anyway. + return Ok(()); + }; + let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; + + assert_equality(&source_tree, &destination, opts.fs.symlink)?; + assert!(outcome.collisions.is_empty()); + Ok(()) +} + +#[test] +fn keep_going_collects_results() { + let mut opts = opts_with_symlink(true); + opts.keep_going = true; + let mut count = 0; + let (_source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir_prep_dest(opts, "make_mixed_without_submodules", |_id| { + if count < 2 { + count += 1; + false + } else { + true + } + }) + .unwrap(); + + assert_eq!( + stripped_prefix(&destination, &dir_structure(&destination)), + paths(["empty", "executable"]), + "some files could not be created" + ); + + assert!(outcome.collisions.is_empty()); + assert_eq!( + outcome + .errors + .into_iter() + .map(|r| Vec::from(r.path).into_path_buf_lossy()) + .collect::>(), + paths(["dir/content", "dir/sub-dir/symlink"]) + ); +} + +#[test] +fn no_case_related_collisions_on_case_sensitive_filesystem() { + if probe_gitoxide_dir().unwrap().ignore_case { + eprintln!("Skipping case-sensitive testing on what would be a case-insenstive file system"); + return; + } + let opts = opts_with_symlink(true); + let (source_tree, destination, index, outcome) = + checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); + + let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); + assert_eq!(num_files, index.entries().len(), "it checks out all files"); + assert!(outcome.collisions.is_empty()); +} + +#[test] +fn collisions_are_detected_on_a_case_sensitive_filesystem() { + let fs_caps = probe_gitoxide_dir().unwrap(); + if !fs_caps.ignore_case { + eprintln!("Skipping case-insensitive testing on what would be a case-senstive file system"); + return; + } + let opts = opts_with_symlink(fs_caps.symlink); + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); + + let source_files = dir_structure(&source_tree); + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["d", "file_x", "link-to-X", "x"]), + "plenty of collisions prevent a checkout" + ); + + let dest_files = dir_structure(&destination); + assert_eq!( + stripped_prefix(&destination, &dest_files), + paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), + "we checkout files in order and generally handle collision detection differently, hence the difference" + ); + + let error_kind = ErrorKind::AlreadyExists; + #[cfg(windows)] + let error_kind_dir = ErrorKind::PermissionDenied; + #[cfg(not(windows))] + let error_kind_dir = error_kind; + + assert_eq!( + outcome.collisions, + vec![ + Collision { + path: "FILE_x".into(), + error_kind, + }, + Collision { + path: "d".into(), + error_kind: error_kind_dir, + }, + Collision { + path: "file_X".into(), + error_kind, + }, + Collision { + path: "file_x".into(), + error_kind, + }, + Collision { + path: "x".into(), + error_kind, + }, + ], + "these files couldn't be checked out" + ); +} + +fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { + let source_files = dir_structure(source_tree); + let worktree_files = dir_structure(&destination); + + assert_eq!( + stripped_prefix(source_tree, &source_files), + stripped_prefix(&destination, &worktree_files), + ); + + let mut count = 0; + for (source_file, worktree_file) in source_files.iter().zip(worktree_files.iter()) { + count += 1; + if !allow_symlinks && source_file.is_symlink() { + assert!(!worktree_file.is_symlink()); + assert_eq!(fs::read(worktree_file)?.to_path()?, fs::read_link(source_file)?); + } else { + assert_eq!(fs::read(source_file)?, fs::read(worktree_file)?); + #[cfg(unix)] + assert_eq!( + fs::symlink_metadata(source_file)?.mode() & 0o700, + fs::symlink_metadata(worktree_file)?.mode() & 0o700, + "permissions of source and checked out file are comparable" + ); + } + } + Ok(count) +} + +pub fn dir_structure>(path: P) -> Vec { + let path = path.as_ref(); + let mut files: Vec<_> = walkdir::WalkDir::new(path) + .follow_links(false) + .into_iter() + .filter_entry(|e| e.path() == path || !e.file_name().to_string_lossy().starts_with('.')) + .flatten() + .filter_map(|e| (!e.path().is_dir()).then(|| e.path().to_path_buf())) + .collect(); + files.sort(); + files +} + +fn checkout_index_in_tmp_dir( + opts: index::checkout::Options, + name: &str, +) -> crate::Result<( + PathBuf, + TempDir, + git_index::File, + git_worktree::index::checkout::Outcome, +)> { + checkout_index_in_tmp_dir_prep_dest(opts, name, |_d| true) +} + +fn checkout_index_in_tmp_dir_prep_dest( + opts: index::checkout::Options, + name: &str, + mut allow_return_object: impl FnMut(&git_hash::oid) -> bool, +) -> crate::Result<( + PathBuf, + TempDir, + git_index::File, + git_worktree::index::checkout::Outcome, +)> { + let source_tree = fixture_path(name); + let git_dir = source_tree.join(".git"); + let mut index = git_index::File::at(git_dir.join("index"), Default::default())?; + let odb = git_odb::at(git_dir.join("objects"))?; + let destination = tempfile::tempdir()?; + + let outcome = index::checkout( + &mut index, + destination.path(), + move |oid, buf| { + if allow_return_object(oid) { + odb.find_blob(oid, buf) + } else { + Err(git_odb::find::existing_object::Error::NotFound { oid: oid.to_owned() }) + } + }, + &mut progress::Discard, + &mut progress::Discard, + opts, + )?; + Ok((source_tree, destination, index, outcome)) +} + +fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { + source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() +} + +fn probe_gitoxide_dir() -> crate::Result { + Ok(git_worktree::fs::Capabilities::probe( + std::env::current_dir()?.join("..").join(".git"), + )) +} + +fn opts_with_symlink(symlink: bool) -> index::checkout::Options { + index::checkout::Options { + fs: git_worktree::fs::Capabilities { + symlink, + ..Default::default() + }, + destination_is_initially_empty: true, + ..Default::default() + } +} + +fn paths<'a>(p: impl IntoIterator) -> Vec { + p.into_iter().map(PathBuf::from).collect() +} diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs index a8e1bbc697f..24370dce48a 100644 --- a/git-worktree/tests/index/mod.rs +++ b/git-worktree/tests/index/mod.rs @@ -1,351 +1 @@ -mod checkout { - mod cache { - use git_index::entry::Mode; - use git_worktree::index::checkout::PathCache; - use std::path::Path; - use tempfile::{tempdir, TempDir}; - - #[test] - fn root_is_assumed_to_exist_and_files_in_root_do_not_create_directory() { - let dir = tempdir().unwrap(); - let mut cache = PathCache::new(dir.path().join("non-existing-root")); - assert_eq!(cache.test_mkdir_calls, 0); - - let path = cache - .append_relative_path_assure_leading_dir("hello", Mode::FILE) - .unwrap(); - assert!(!path.parent().unwrap().exists(), "prefix itself is never created"); - assert_eq!(cache.test_mkdir_calls, 0); - } - - #[test] - fn directory_paths_are_created_in_full() { - let (mut cache, _tmp) = new_cache(); - - for (name, mode) in &[ - ("dir", Mode::DIR), - ("submodule", Mode::COMMIT), - ("file", Mode::FILE), - ("exe", Mode::FILE_EXECUTABLE), - ("link", Mode::SYMLINK), - ] { - let path = cache - .append_relative_path_assure_leading_dir(Path::new("dir").join(name), *mode) - .unwrap(); - assert!(path.parent().unwrap().is_dir(), "dir exists"); - } - - assert_eq!(cache.test_mkdir_calls, 3); - } - - #[test] - fn existing_directories_are_fine() { - let (mut cache, tmp) = new_cache(); - std::fs::create_dir(tmp.path().join("dir")).unwrap(); - - let path = cache - .append_relative_path_assure_leading_dir("dir/file", Mode::FILE) - .unwrap(); - assert!(path.parent().unwrap().is_dir(), "directory is still present"); - assert!(!path.exists(), "it won't create the file"); - assert_eq!(cache.test_mkdir_calls, 1); - } - - #[test] - fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() { - let (mut cache, tmp) = new_cache(); - let forbidden = tmp.path().join("forbidden"); - std::fs::create_dir(&forbidden).unwrap(); - symlink::symlink_dir(&forbidden, tmp.path().join("link-to-dir")).unwrap(); - std::fs::write(tmp.path().join("file-in-dir"), &[]).unwrap(); - - for dirname in &["link-to-dir", "file-in-dir"] { - cache.unlink_on_collision = false; - let relative_path = format!("{}/file", dirname); - assert_eq!( - cache - .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) - .unwrap_err() - .kind(), - std::io::ErrorKind::AlreadyExists - ); - cache.unlink_on_collision = true; - - let path = cache - .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) - .unwrap(); - assert!(path.parent().unwrap().is_dir(), "directory was forcefully created"); - assert!(!path.exists()); - } - assert_eq!(cache.test_mkdir_calls, 4); - } - - fn new_cache() -> (PathCache, TempDir) { - let dir = tempdir().unwrap(); - let cache = PathCache::new(dir.path()); - (cache, dir) - } - } - - use bstr::ByteVec; - #[cfg(unix)] - use std::os::unix::prelude::MetadataExt; - use std::{ - fs, - io::ErrorKind, - path::{Path, PathBuf}, - }; - - use git_features::progress; - use git_object::bstr::ByteSlice; - use git_odb::FindExt; - use git_worktree::{fs::Capabilities, index, index::checkout::Collision}; - use tempfile::TempDir; - - use crate::fixture_path; - - #[test] - fn symlinks_become_files_if_disabled() -> crate::Result { - let opts = opts_with_symlink(false); - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; - - assert_equality(&source_tree, &destination, opts.fs.symlink)?; - assert!(outcome.collisions.is_empty()); - Ok(()) - } - - #[test] - fn allow_symlinks() -> crate::Result { - let opts = opts_with_symlink(true); - if !probe_gitoxide_dir()?.symlink { - eprintln!("IGNORING symlink test on file system without symlink support"); - // skip if symlinks aren't supported anyway. - return Ok(()); - }; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; - - assert_equality(&source_tree, &destination, opts.fs.symlink)?; - assert!(outcome.collisions.is_empty()); - Ok(()) - } - - #[test] - fn keep_going_collects_results() { - let mut opts = opts_with_symlink(true); - opts.keep_going = true; - let mut count = 0; - let (_source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir_prep_dest(opts, "make_mixed_without_submodules", |_id| { - if count < 2 { - count += 1; - false - } else { - true - } - }) - .unwrap(); - - assert_eq!( - stripped_prefix(&destination, &dir_structure(&destination)), - paths(["empty", "executable"]), - "some files could not be created" - ); - - assert!(outcome.collisions.is_empty()); - assert_eq!( - outcome - .errors - .into_iter() - .map(|r| Vec::from(r.path).into_path_buf_lossy()) - .collect::>(), - paths(["dir/content", "dir/sub-dir/symlink"]) - ); - } - - #[test] - fn no_case_related_collisions_on_case_sensitive_filesystem() { - if probe_gitoxide_dir().unwrap().ignore_case { - eprintln!("Skipping case-sensitive testing on what would be a case-insenstive file system"); - return; - } - let opts = opts_with_symlink(true); - let (source_tree, destination, index, outcome) = - checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); - - let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); - assert_eq!(num_files, index.entries().len(), "it checks out all files"); - assert!(outcome.collisions.is_empty()); - } - - #[test] - fn collisions_are_detected_on_a_case_sensitive_filesystem() { - let fs_caps = probe_gitoxide_dir().unwrap(); - if !fs_caps.ignore_case { - eprintln!("Skipping case-insensitive testing on what would be a case-senstive file system"); - return; - } - let opts = opts_with_symlink(fs_caps.symlink); - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); - - let source_files = dir_structure(&source_tree); - assert_eq!( - stripped_prefix(&source_tree, &source_files), - paths(["d", "file_x", "link-to-X", "x"]), - "plenty of collisions prevent a checkout" - ); - - let dest_files = dir_structure(&destination); - assert_eq!( - stripped_prefix(&destination, &dest_files), - paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), - "we checkout files in order and generally handle collision detection differently, hence the difference" - ); - - let error_kind = ErrorKind::AlreadyExists; - #[cfg(windows)] - let error_kind_dir = ErrorKind::PermissionDenied; - #[cfg(not(windows))] - let error_kind_dir = error_kind; - - assert_eq!( - outcome.collisions, - vec![ - Collision { - path: "FILE_x".into(), - error_kind, - }, - Collision { - path: "d".into(), - error_kind: error_kind_dir, - }, - Collision { - path: "file_X".into(), - error_kind, - }, - Collision { - path: "file_x".into(), - error_kind, - }, - Collision { - path: "x".into(), - error_kind, - }, - ], - "these files couldn't be checked out" - ); - } - - fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { - let source_files = dir_structure(source_tree); - let worktree_files = dir_structure(&destination); - - assert_eq!( - stripped_prefix(source_tree, &source_files), - stripped_prefix(&destination, &worktree_files), - ); - - let mut count = 0; - for (source_file, worktree_file) in source_files.iter().zip(worktree_files.iter()) { - count += 1; - if !allow_symlinks && source_file.is_symlink() { - assert!(!worktree_file.is_symlink()); - assert_eq!(fs::read(worktree_file)?.to_path()?, fs::read_link(source_file)?); - } else { - assert_eq!(fs::read(source_file)?, fs::read(worktree_file)?); - #[cfg(unix)] - assert_eq!( - fs::symlink_metadata(source_file)?.mode() & 0o700, - fs::symlink_metadata(worktree_file)?.mode() & 0o700, - "permissions of source and checked out file are comparable" - ); - } - } - Ok(count) - } - - pub fn dir_structure>(path: P) -> Vec { - let path = path.as_ref(); - let mut files: Vec<_> = walkdir::WalkDir::new(path) - .follow_links(false) - .into_iter() - .filter_entry(|e| e.path() == path || !e.file_name().to_string_lossy().starts_with('.')) - .flatten() - .filter_map(|e| (!e.path().is_dir()).then(|| e.path().to_path_buf())) - .collect(); - files.sort(); - files - } - - fn checkout_index_in_tmp_dir( - opts: index::checkout::Options, - name: &str, - ) -> crate::Result<( - PathBuf, - TempDir, - git_index::File, - git_worktree::index::checkout::Outcome, - )> { - checkout_index_in_tmp_dir_prep_dest(opts, name, |_d| true) - } - - fn checkout_index_in_tmp_dir_prep_dest( - opts: index::checkout::Options, - name: &str, - mut allow_return_object: impl FnMut(&git_hash::oid) -> bool, - ) -> crate::Result<( - PathBuf, - TempDir, - git_index::File, - git_worktree::index::checkout::Outcome, - )> { - let source_tree = fixture_path(name); - let git_dir = source_tree.join(".git"); - let mut index = git_index::File::at(git_dir.join("index"), Default::default())?; - let odb = git_odb::at(git_dir.join("objects"))?; - let destination = tempfile::tempdir()?; - - let outcome = index::checkout( - &mut index, - destination.path(), - move |oid, buf| { - if allow_return_object(oid) { - odb.find_blob(oid, buf) - } else { - Err(git_odb::find::existing_object::Error::NotFound { oid: oid.to_owned() }) - } - }, - &mut progress::Discard, - &mut progress::Discard, - opts, - )?; - Ok((source_tree, destination, index, outcome)) - } - - fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { - source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() - } - - fn probe_gitoxide_dir() -> crate::Result { - Ok(git_worktree::fs::Capabilities::probe( - std::env::current_dir()?.join("..").join(".git"), - )) - } - - fn opts_with_symlink(symlink: bool) -> index::checkout::Options { - index::checkout::Options { - fs: git_worktree::fs::Capabilities { - symlink, - ..Default::default() - }, - destination_is_initially_empty: true, - ..Default::default() - } - } - - fn paths<'a>(p: impl IntoIterator) -> Vec { - p.into_iter().map(PathBuf::from).collect() - } -} +mod checkout; From 9f9d36d7d7bba443fba5917e9920911596fd64f6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 11:33:08 +0800 Subject: [PATCH 05/37] try to fix tests on linux (#301) --- git-worktree/tests/index/checkout.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index abd85fc213e..b0b68b7c212 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -139,9 +139,14 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( ] ); } else { - let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; - assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); - assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["A-dir/a", "A-file", "FAKE-FILE", "fake-dir/b", "fake-file"]) + ); + assert_eq!( + stripped_prefix(&destination, &worktree_files), + paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]) + ); assert!(outcome.collisions.is_empty()); }; } From d3d7a7c3c67868ba0fda6b04e6874aa2f91f638b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 11:44:22 +0800 Subject: [PATCH 06/37] Allow symlinks to dirs to be returned, too (#301) --- git-worktree/tests/index/checkout.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index b0b68b7c212..63daac81a20 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -119,11 +119,11 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( if fs_caps.ignore_case { assert_eq!( stripped_prefix(&source_tree, &source_files), - paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]) + paths(["A-dir/a", "A-file", "FAKE-DIR", "fake-file"]) ); assert_eq!( stripped_prefix(&destination, &worktree_files), - paths(["A-dir/a", "A-file", "FAKE-FILE"]) + paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]) ); assert_eq!( outcome.collisions, @@ -139,14 +139,9 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( ] ); } else { - assert_eq!( - stripped_prefix(&source_tree, &source_files), - paths(["A-dir/a", "A-file", "FAKE-FILE", "fake-dir/b", "fake-file"]) - ); - assert_eq!( - stripped_prefix(&destination, &worktree_files), - paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]) - ); + let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; + assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); + assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); assert!(outcome.collisions.is_empty()); }; } @@ -318,7 +313,7 @@ pub fn dir_structure>(path: P) -> Vec Date: Mon, 7 Mar 2022 12:00:40 +0800 Subject: [PATCH 07/37] fix case-insensitive tests (#301) It helps to have a local case-insensitive file system to test against --- git-worktree/tests/index/checkout.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index 63daac81a20..018bee1509f 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -119,7 +119,7 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( if fs_caps.ignore_case { assert_eq!( stripped_prefix(&source_tree, &source_files), - paths(["A-dir/a", "A-file", "FAKE-DIR", "fake-file"]) + paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]) ); assert_eq!( stripped_prefix(&destination, &worktree_files), @@ -206,17 +206,19 @@ fn keep_going_collects_results() { #[test] fn no_case_related_collisions_on_case_sensitive_filesystem() { - if probe_gitoxide_dir().unwrap().ignore_case { - eprintln!("Skipping case-sensitive testing on what would be a case-insenstive file system"); + let fs_caps = probe_gitoxide_dir().unwrap(); + if fs_caps.ignore_case { + eprintln!("Skipping case-sensitive testing on what would be a case-insensitive file system"); return; } - let opts = opts_with_symlink(true); + let mut opts = opts_with_symlink(true); + opts.fs = fs_caps; let (source_tree, destination, index, outcome) = checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); + assert!(outcome.collisions.is_empty()); let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); assert_eq!(num_files, index.entries().len(), "it checks out all files"); - assert!(outcome.collisions.is_empty()); } #[test] @@ -345,7 +347,7 @@ fn checkout_index_in_tmp_dir_prep_dest( let git_dir = source_tree.join(".git"); let mut index = git_index::File::at(git_dir.join("index"), Default::default())?; let odb = git_odb::at(git_dir.join("objects"))?; - let destination = tempfile::tempdir()?; + let destination = tempfile::tempdir_in(std::env::current_dir()?)?; let outcome = index::checkout( &mut index, From cd6e08644df3a2b52aa70a2f37e988ec10b280f0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 12:53:15 +0800 Subject: [PATCH 08/37] =?UTF-8?q?prepare=20for=20first=20overwrite=20test?= =?UTF-8?q?=E2=80=A6=20(#301)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …and have to figure out how that should work also in conjunction with something like 'check-path'. For now it seems we can put all the work to unlink destinations into our write-entry equivalent, allowing check-path to not mutate anything. --- git-worktree/src/index/entry.rs | 7 ++--- git-worktree/tests/index/checkout.rs | 43 ++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index f86c2771a3e..edf07803108 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -30,12 +30,11 @@ where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, E: std::error::Error + Send + Sync + 'static, { - let dest = cache.append_relative_path_assure_leading_dir( + let dest_relative = git_features::path::from_byte_slice(entry_path).map_err(|_| index::checkout::Error::IllformedUtf8 { path: entry_path.to_owned(), - })?, - entry.mode, - )?; + })?; + let dest = cache.append_relative_path_assure_leading_dir(dest_relative, entry.mode)?; let object_size = match entry.mode { git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index 018bee1509f..92088c520ea 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -146,6 +146,49 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( }; } +#[test] +#[ignore] +fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { + let mut opts = opts_with_symlink(true); + // with overwrite mode + opts.overwrite_existing = true; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_dangerous_symlink").unwrap(); + + let source_files = dir_structure(&source_tree); + let worktree_files = dir_structure(&destination); + + let fs_caps = probe_gitoxide_dir().unwrap(); + if fs_caps.ignore_case { + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]) + ); + assert_eq!( + stripped_prefix(&destination, &worktree_files), + paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]) + ); + assert_eq!( + outcome.collisions, + vec![ + Collision { + path: "fake-dir/b".into(), + error_kind: AlreadyExists + }, + Collision { + path: "fake-file".into(), + error_kind: AlreadyExists + } + ] + ); + } else { + let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; + assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); + assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); + assert!(outcome.collisions.is_empty()); + }; +} + #[test] fn symlinks_become_files_if_disabled() -> crate::Result { let opts = opts_with_symlink(false); From 77b053dfd38e30a8ab397704059283a4766b9601 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 15:06:21 +0800 Subject: [PATCH 09/37] =?UTF-8?q?delayed=20symlink=20creation=20for=20wind?= =?UTF-8?q?ows,=20but=E2=80=A6=20(#301)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …tests need some work, it doesn't yet run as it should. --- git-worktree/src/index/mod.rs | 141 ++++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 39 deletions(-) diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 6859e2e5a0a..c492893d7a6 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,3 +1,4 @@ +use bstr::BStr; use git_features::progress::Progress; use git_hash::oid; @@ -10,7 +11,7 @@ pub(crate) mod entry; pub fn checkout( index: &mut git_index::State, dir: impl Into, - mut find: Find, + find: Find, files: &mut impl Progress, bytes: &mut impl Progress, options: checkout::Options, @@ -23,56 +24,118 @@ where todo!("deal with non-clone checkouts") } - use std::io::ErrorKind::AlreadyExists; let mut path_cache = PathCache::new(dir.into()); path_cache.unlink_on_collision = options.overwrite_existing; - let mut buf = Vec::new(); - let mut collisions = Vec::new(); - let mut errors = Vec::new(); + let mut delayed = Vec::new(); + + let mut ctx = Context { + buf: Vec::new(), + collisions: Vec::new(), + errors: Vec::new(), + path_cache, + files, + bytes, + find, + options, + }; for (entry, entry_path) in index.entries_mut_with_paths() { // TODO: write test for that if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { - files.inc(); + ctx.files.inc(); continue; } - let res = entry::checkout(entry, entry_path, &mut find, &mut path_cache, options, &mut buf); - files.inc(); - match res { - Ok(object_size) => bytes.inc_by(object_size), - #[cfg(windows)] - Err(index::checkout::Error::Io(err)) - if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => - { - collisions.push(checkout::Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - } - // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() - #[cfg(not(windows))] - Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { - // We are here because a file existed or was blocked by a directory which shouldn't be possible unless - // we are on a file insensitive file system. - files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); - collisions.push(checkout::Collision { + // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. + // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is + // before creating it. + if cfg!(windows) && entry.mode == git_index::entry::Mode::SYMLINK { + // if entry.mode == git_index::entry::Mode::SYMLINK { + delayed.push((entry, entry_path)); + continue; + } + + checkout_entry_handle_result(entry, entry_path, &mut ctx)?; + } + + for (entry, entry_path) in delayed { + checkout_entry_handle_result(entry, entry_path, &mut ctx)?; + } + + Ok(checkout::Outcome { + collisions: ctx.collisions, + errors: ctx.errors, + }) +} + +struct Context<'a, P1, P2, Find> { + bytes: &'a mut P1, + files: &'a mut P2, + find: Find, + path_cache: PathCache, + buf: Vec, + options: checkout::Options, + errors: Vec, + collisions: Vec, +} + +fn checkout_entry_handle_result( + entry: &mut git_index::Entry, + entry_path: &BStr, + Context { + bytes, + files, + find, + path_cache, + buf, + options, + errors, + collisions, + }: &mut Context<'_, P1, P2, Find>, +) -> Result<(), checkout::Error> +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, + P1: Progress, + P2: Progress, +{ + use std::io::ErrorKind::AlreadyExists; + + let res = entry::checkout(entry, entry_path, find, path_cache, *options, buf); + files.inc(); + match res { + Ok(object_size) => bytes.inc_by(object_size), + #[cfg(windows)] + Err(index::checkout::Error::Io(err)) + if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => + { + collisions.push(checkout::Collision { + path: entry_path.into(), + error_kind: err.kind(), + }); + } + // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() + #[cfg(not(windows))] + Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { + // We are here because a file existed or was blocked by a directory which shouldn't be possible unless + // we are on a file insensitive file system. + files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); + collisions.push(checkout::Collision { + path: entry_path.into(), + error_kind: err.kind(), + }); + } + Err(err) => { + if options.keep_going { + errors.push(ErrorRecord { path: entry_path.into(), - error_kind: err.kind(), + error: Box::new(err), }); - } - Err(err) => { - if options.keep_going { - errors.push(ErrorRecord { - path: entry_path.into(), - error: Box::new(err), - }); - } else { - return Err(err); - } + } else { + return Err(err); } } - } - Ok(checkout::Outcome { collisions, errors }) + }; + Ok(()) } From ab5cd3d383c3c6cb31a7b8d387daedacb9e3838f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 15:35:41 +0800 Subject: [PATCH 10/37] =?UTF-8?q?delayed=20symlink=20creation=20for=20ever?= =?UTF-8?q?yone,=20but=E2=80=A6(#301)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …the path cache now shows failing tests due to fix. Needs its state handling fixed. --- git-worktree/src/index/checkout.rs | 1 + git-worktree/src/index/mod.rs | 5 ++++- git-worktree/tests/index/checkout.rs | 13 +++++-------- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index edbfed4ba2c..67f9ae7b124 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -83,6 +83,7 @@ mod cache { for _ in 0..self.valid_components - matching_components { self.valid.pop(); + self.valid_relative.pop(); } self.valid_components = matching_components; diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index c492893d7a6..9a50574584f 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -50,7 +50,10 @@ where // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is // before creating it. - if cfg!(windows) && entry.mode == git_index::entry::Mode::SYMLINK { + // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety + // around writing through symlinks (even though we handle this). + // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. + if entry.mode == git_index::entry::Mode::SYMLINK { // if entry.mode == git_index::entry::Mode::SYMLINK { delayed.push((entry, entry_path)); continue; diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index 92088c520ea..cf989ca1b91 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -51,6 +51,7 @@ mod cache { } #[test] + #[ignore] fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() { let (mut cache, tmp) = new_cache(); let forbidden = tmp.path().join("forbidden"); @@ -119,21 +120,17 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( if fs_caps.ignore_case { assert_eq!( stripped_prefix(&source_tree, &source_files), - paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]) - ); - assert_eq!( stripped_prefix(&destination, &worktree_files), - paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]) ); assert_eq!( outcome.collisions, vec![ Collision { - path: "fake-dir/b".into(), + path: "FAKE-DIR".into(), error_kind: AlreadyExists }, Collision { - path: "fake-file".into(), + path: "FAKE-FILE".into(), error_kind: AlreadyExists } ] @@ -232,7 +229,7 @@ fn keep_going_collects_results() { assert_eq!( stripped_prefix(&destination, &dir_structure(&destination)), - paths(["empty", "executable"]), + paths(["dir/sub-dir/symlink", "executable"]), "some files could not be created" ); @@ -243,7 +240,7 @@ fn keep_going_collects_results() { .into_iter() .map(|r| Vec::from(r.path).into_path_buf_lossy()) .collect::>(), - paths(["dir/content", "dir/sub-dir/symlink"]) + paths(["dir/content", "empty"]) ); } From 52c0058531df1a0f3fc755c5c51e71d34841cb77 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 15:47:38 +0800 Subject: [PATCH 11/37] Fix dir-cache to properly handle its valiity which fixes test (#301) --- git-worktree/src/index/checkout.rs | 58 +++++++++++++++++----------- git-worktree/tests/index/checkout.rs | 15 +++++-- 2 files changed, 48 insertions(+), 25 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 67f9ae7b124..0b55bb38169 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -93,34 +93,48 @@ mod cache { self.valid.push(comp); self.valid_relative.push(comp); self.valid_components += 1; - if components.peek().is_some() || target_is_dir { - #[cfg(debug_assertions)] - { - self.test_mkdir_calls += 1; - } - match std::fs::create_dir(&self.valid) { - Ok(()) => {} - Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { - let meta = self.valid.symlink_metadata()?; - if !meta.is_dir() { - if self.unlink_on_collision { - if meta.is_symlink() { - symlink::remove_symlink_auto(&self.valid)?; + let res = (|| { + if components.peek().is_some() || target_is_dir { + #[cfg(debug_assertions)] + { + self.test_mkdir_calls += 1; + } + match std::fs::create_dir(&self.valid) { + Ok(()) => Ok(()), + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { + let meta = self.valid.symlink_metadata()?; + if !meta.is_dir() { + if self.unlink_on_collision { + if meta.is_symlink() { + symlink::remove_symlink_auto(&self.valid)?; + } else { + std::fs::remove_file(&self.valid)?; + } + #[cfg(debug_assertions)] + { + self.test_mkdir_calls += 1; + } + std::fs::create_dir(&self.valid)?; + Ok(()) } else { - std::fs::remove_file(&self.valid)?; + Err(err) } - #[cfg(debug_assertions)] - { - self.test_mkdir_calls += 1; - } - std::fs::create_dir(&self.valid)?; - continue; + } else { + Ok(()) } - return Err(err); } + Err(err) => Err(err), } - Err(err) => return Err(err), + } else { + Ok(()) } + })(); + + if let Err(err) = res { + self.valid.pop(); + self.valid_relative.pop(); + self.valid_components -= 1; + return Err(err); } } diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index cf989ca1b91..800bdf85f8c 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -51,7 +51,6 @@ mod cache { } #[test] - #[ignore] fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() { let (mut cache, tmp) = new_cache(); let forbidden = tmp.path().join("forbidden"); @@ -69,15 +68,25 @@ mod cache { .kind(), std::io::ErrorKind::AlreadyExists ); + } + assert_eq!( + cache.test_mkdir_calls, 2, + "it tries to create each directory once, but it's a file" + ); + cache.test_mkdir_calls = 0; + for dirname in &["link-to-dir", "file-in-dir"] { cache.unlink_on_collision = true; - + let relative_path = format!("{}/file", dirname); let path = cache .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) .unwrap(); assert!(path.parent().unwrap().is_dir(), "directory was forcefully created"); assert!(!path.exists()); } - assert_eq!(cache.test_mkdir_calls, 4); + assert_eq!( + cache.test_mkdir_calls, 4, + "like before, but it unlinks what's there and tries again" + ); } fn new_cache() -> (PathCache, TempDir) { From 49d1d34dff76d8b1e5e7fa9d08e6ead4e8bca018 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 17:32:55 +0800 Subject: [PATCH 12/37] overwrite-existing support with tests (#301) We do our best to test all the cases, and tests discovered quite a few special cases. Directories need to be removed recursively, but when that is done it should be quite equivalent to the capabiliites of git itself. --- git-worktree/src/index/entry.rs | 61 ++++++- git-worktree/src/index/mod.rs | 21 +-- git-worktree/src/os.rs | 12 ++ .../fixtures/make_mixed_without_submodules.sh | 4 +- git-worktree/tests/index/checkout.rs | 152 +++++++++++------- 5 files changed, 167 insertions(+), 83 deletions(-) diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index edf07803108..8e4cb762d79 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -5,8 +5,8 @@ use bstr::BStr; use git_hash::oid; use git_index::Entry; -use crate::index; use crate::index::checkout::PathCache; +use crate::{index, os}; #[cfg_attr(not(unix), allow(unused_variables))] pub fn checkout( @@ -45,14 +45,26 @@ where })?; let mut options = open_options(dest, destination_is_initially_empty, overwrite_existing); + let needs_executable_bit = executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE; #[cfg(unix)] - if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE { + if needs_executable_bit && destination_is_initially_empty { use std::os::unix::fs::OpenOptionsExt; + // Note that these only work if the file was newly created, but won't if it's already + // existing, possibly without the executable bit set. Thus we do this only if the file is new. options.mode(0o777); } - let mut file = options.open(&dest)?; + let mut file = try_write_or_unlink(dest, overwrite_existing, |p| options.open(p))?; file.write_all(obj.data)?; + + // For possibly existing, overwritten files, we must change the file mode explicitly. + #[cfg(unix)] + if needs_executable_bit && !destination_is_initially_empty { + use std::os::unix::fs::PermissionsExt; + let mut perm = std::fs::symlink_metadata(&dest)?.permissions(); + perm.set_mode(0o777); + std::fs::set_permissions(&dest, perm)?; + } // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. // revisit this once there is a bug to fix. update_fstat(entry, file.metadata()?)?; @@ -74,11 +86,14 @@ where // or directories or symlinks. Doing this shouldn't invalidate the cache as it's only valid till // our parent anyway, but it may invalidate caches in other threads without their knowledge. // collisions with overwrite mode must be handled with great care in parallel mode. - crate::os::create_symlink(symlink_destination, dest)?; + try_write_or_unlink(dest, overwrite_existing, |p| { + crate::os::create_symlink(symlink_destination, p) + })?; } else { - open_options(dest, destination_is_initially_empty, overwrite_existing) - .open(&dest)? - .write_all(obj.data)?; + try_write_or_unlink(dest, overwrite_existing, |p| { + open_options(p, destination_is_initially_empty, overwrite_existing).open(&dest) + })? + .write_all(obj.data)?; } update_fstat(entry, std::fs::symlink_metadata(&dest)?)?; @@ -91,6 +106,38 @@ where Ok(object_size) } +/// Note that this works only because we assume to not race ourselves when symlinks are involved, and we do this by +/// delaying symlink creation to the end and will always do that sequentially. +/// It's still possible to fall for a race if other actors create symlinks in our path, but that's nothing to defend against. +fn try_write_or_unlink( + path: &Path, + overwrite_existing: bool, + op: impl Fn(&Path) -> std::io::Result, +) -> std::io::Result { + if overwrite_existing { + match op(path) { + Ok(res) => Ok(res), + Err(err) if os::indicates_collision(&err) => { + try_unlink_anything_non_recursive(path, &std::fs::symlink_metadata(path)?)?; + op(path) + } + Err(err) => Err(err), + } + } else { + op(path) + } +} + +fn try_unlink_anything_non_recursive(path: &Path, path_meta: &std::fs::Metadata) -> std::io::Result<()> { + if path_meta.is_dir() { + std::fs::remove_dir(path) + } else if path_meta.is_symlink() { + symlink::remove_symlink_auto(path) + } else { + std::fs::remove_file(path) + } +} + /// This is a debug assertion as we expect the machinery calling this to prevent this possibility in the first place fn debug_assert_dest_is_no_symlink(path: &Path) { #[cfg(debug_assertions)] diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 9a50574584f..856dc3b51a1 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -2,8 +2,8 @@ use bstr::BStr; use git_features::progress::Progress; use git_hash::oid; -use crate::index; use crate::index::checkout::{ErrorRecord, PathCache}; +use crate::{index, os}; pub mod checkout; pub(crate) mod entry; @@ -20,10 +20,6 @@ where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, E: std::error::Error + Send + Sync + 'static, { - if !options.destination_is_initially_empty { - todo!("deal with non-clone checkouts") - } - let mut path_cache = PathCache::new(dir.into()); path_cache.unlink_on_collision = options.overwrite_existing; @@ -103,24 +99,11 @@ where P1: Progress, P2: Progress, { - use std::io::ErrorKind::AlreadyExists; - let res = entry::checkout(entry, entry_path, find, path_cache, *options, buf); files.inc(); match res { Ok(object_size) => bytes.inc_by(object_size), - #[cfg(windows)] - Err(index::checkout::Error::Io(err)) - if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => - { - collisions.push(checkout::Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - } - // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() - #[cfg(not(windows))] - Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { + Err(index::checkout::Error::Io(err)) if os::indicates_collision(&err) => { // We are here because a file existed or was blocked by a directory which shouldn't be possible unless // we are on a file insensitive file system. files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index ba66fbd04c9..d968acad974 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -1,4 +1,5 @@ use std::io; +use std::io::ErrorKind::AlreadyExists; use std::path::Path; #[cfg(not(windows))] @@ -26,3 +27,14 @@ pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { symlink_file(original, link) } } + +#[cfg(not(windows))] +pub fn indicates_collision(err: &std::io::Error) -> bool { + // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error(), and ::FileSystemLoop respectively + err.kind() == AlreadyExists || err.raw_os_error() == Some(21) || err.raw_os_error() == Some(62) +} + +#[cfg(windows)] +pub fn indicates_collision(err: &std::io::Error) -> bool { + err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied +} diff --git a/git-worktree/tests/fixtures/make_mixed_without_submodules.sh b/git-worktree/tests/fixtures/make_mixed_without_submodules.sh index 6f8338dd64f..823ba6e966f 100755 --- a/git-worktree/tests/fixtures/make_mixed_without_submodules.sh +++ b/git-worktree/tests/fixtures/make_mixed_without_submodules.sh @@ -5,11 +5,11 @@ git init -q git config commit.gpgsign false touch empty -echo "content" > executable +echo -n "content" > executable chmod +x executable mkdir dir -echo "other content" > dir/content +echo -n "other content" > dir/content mkdir dir/sub-dir (cd dir/sub-dir && ln -sf ../content symlink) diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index 800bdf85f8c..3ea4c9d22c4 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -116,7 +116,7 @@ use crate::fixture_path; #[test] fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden() { - let mut opts = opts_with_symlink(true); + let mut opts = opts_from_probe(); // without overwrite mode, everything is safe. opts.overwrite_existing = false; let (source_tree, destination, _index, outcome) = @@ -125,8 +125,7 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( let source_files = dir_structure(&source_tree); let worktree_files = dir_structure(&destination); - let fs_caps = probe_gitoxide_dir().unwrap(); - if fs_caps.ignore_case { + if opts.fs.ignore_case { assert_eq!( stripped_prefix(&source_tree, &source_files), stripped_prefix(&destination, &worktree_files), @@ -153,9 +152,9 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( } #[test] -#[ignore] +#[ignore] // TODO: needs recursive directory deletion fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { - let mut opts = opts_with_symlink(true); + let mut opts = opts_from_probe(); // with overwrite mode opts.overwrite_existing = true; let (source_tree, destination, _index, outcome) = @@ -164,29 +163,12 @@ fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { let source_files = dir_structure(&source_tree); let worktree_files = dir_structure(&destination); - let fs_caps = probe_gitoxide_dir().unwrap(); - if fs_caps.ignore_case { + if opts.fs.ignore_case { assert_eq!( stripped_prefix(&source_tree, &source_files), - paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]) - ); - assert_eq!( stripped_prefix(&destination, &worktree_files), - paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]) - ); - assert_eq!( - outcome.collisions, - vec![ - Collision { - path: "fake-dir/b".into(), - error_kind: AlreadyExists - }, - Collision { - path: "fake-file".into(), - error_kind: AlreadyExists - } - ] ); + assert!(outcome.collisions.is_empty()); } else { let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); @@ -196,23 +178,69 @@ fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { } #[test] -fn symlinks_become_files_if_disabled() -> crate::Result { - let opts = opts_with_symlink(false); - let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; +fn overwriting_files_and_lone_directories_works() { + let mut opts = opts_from_probe(); + opts.overwrite_existing = true; + opts.destination_is_initially_empty = false; + let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( + opts, + "make_mixed_without_submodules", + |_| true, + |d| { + let empty = d.join("empty"); + symlink::symlink_dir(d.join(".."), &empty)?; // empty is symlink to the directory above + std::fs::write(d.join("executable"), b"foo")?; // executable is regular file and has different content + let dir = d.join("dir"); + std::fs::create_dir(&dir)?; + std::fs::create_dir(dir.join("content"))?; // 'content' is a directory now + + let dir = dir.join("sub-dir"); + std::fs::create_dir(&dir)?; + + symlink::symlink_dir(empty, dir.join("symlink"))?; // 'symlink' is a symlink to another file + Ok(()) + }, + ) + .unwrap(); - assert_equality(&source_tree, &destination, opts.fs.symlink)?; assert!(outcome.collisions.is_empty()); - Ok(()) + + assert_eq!( + stripped_prefix(&destination, &dir_structure(&destination)), + paths(["dir/content", "dir/sub-dir/symlink", "empty", "executable"]) + ); + let meta = std::fs::symlink_metadata(destination.path().join("empty")).unwrap(); + assert!(meta.is_file(), "'empty' is now a file"); + assert_eq!(meta.len(), 0, "'empty' is indeed empty"); + + let exe = destination.path().join("executable"); + assert_eq!( + std::fs::read(&exe).unwrap(), + b"content", + "'exe' has the correct content" + ); + + let meta = std::fs::symlink_metadata(exe).unwrap(); + assert!(meta.is_file()); + if opts.fs.executable_bit { + #[cfg(unix)] + assert_eq!(meta.mode() & 0o700, 0o700, "the executable bit is set where supported"); + } + + assert_eq!( + std::fs::read(destination.path().join("dir/content")).unwrap(), + b"other content" + ); + + let symlink = destination.path().join("dir/sub-dir/symlink"); + assert!(std::fs::symlink_metadata(&symlink).unwrap().is_symlink(),); + assert_eq!(std::fs::read(symlink).unwrap(), b"other content"); } #[test] -fn allow_symlinks() -> crate::Result { - let opts = opts_with_symlink(true); - if !probe_gitoxide_dir()?.symlink { - eprintln!("IGNORING symlink test on file system without symlink support"); - // skip if symlinks aren't supported anyway. - return Ok(()); - }; +fn symlinks_become_files_if_disabled() -> crate::Result { + let mut opts = opts_from_probe(); + opts.fs.symlink = false; let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; assert_equality(&source_tree, &destination, opts.fs.symlink)?; @@ -220,21 +248,39 @@ fn allow_symlinks() -> crate::Result { Ok(()) } +#[test] +fn allow_or_disallow_symlinks() -> crate::Result { + let mut opts = opts_from_probe(); + for allowed in &[false, true] { + opts.fs.symlink = *allowed; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; + + assert_equality(&source_tree, &destination, opts.fs.symlink)?; + assert!(outcome.collisions.is_empty()); + } + Ok(()) +} + #[test] fn keep_going_collects_results() { - let mut opts = opts_with_symlink(true); + let mut opts = opts_from_probe(); opts.keep_going = true; let mut count = 0; - let (_source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir_prep_dest(opts, "make_mixed_without_submodules", |_id| { + let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( + opts, + "make_mixed_without_submodules", + |_id| { if count < 2 { count += 1; false } else { true } - }) - .unwrap(); + }, + |_| Ok(()), + ) + .unwrap(); assert_eq!( stripped_prefix(&destination, &dir_structure(&destination)), @@ -255,13 +301,11 @@ fn keep_going_collects_results() { #[test] fn no_case_related_collisions_on_case_sensitive_filesystem() { - let fs_caps = probe_gitoxide_dir().unwrap(); - if fs_caps.ignore_case { + let opts = opts_from_probe(); + if opts.fs.ignore_case { eprintln!("Skipping case-sensitive testing on what would be a case-insensitive file system"); return; } - let mut opts = opts_with_symlink(true); - opts.fs = fs_caps; let (source_tree, destination, index, outcome) = checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); @@ -272,12 +316,11 @@ fn no_case_related_collisions_on_case_sensitive_filesystem() { #[test] fn collisions_are_detected_on_a_case_sensitive_filesystem() { - let fs_caps = probe_gitoxide_dir().unwrap(); - if !fs_caps.ignore_case { + let opts = opts_from_probe(); + if !opts.fs.ignore_case { eprintln!("Skipping case-insensitive testing on what would be a case-senstive file system"); return; } - let opts = opts_with_symlink(fs_caps.symlink); let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); @@ -379,13 +422,14 @@ fn checkout_index_in_tmp_dir( git_index::File, git_worktree::index::checkout::Outcome, )> { - checkout_index_in_tmp_dir_prep_dest(opts, name, |_d| true) + checkout_index_in_tmp_dir_opts(opts, name, |_d| true, |_| Ok(())) } -fn checkout_index_in_tmp_dir_prep_dest( +fn checkout_index_in_tmp_dir_opts( opts: index::checkout::Options, name: &str, mut allow_return_object: impl FnMut(&git_hash::oid) -> bool, + prep_dest: impl Fn(&Path) -> std::io::Result<()>, ) -> crate::Result<( PathBuf, TempDir, @@ -397,6 +441,7 @@ fn checkout_index_in_tmp_dir_prep_dest( let mut index = git_index::File::at(git_dir.join("index"), Default::default())?; let odb = git_odb::at(git_dir.join("objects"))?; let destination = tempfile::tempdir_in(std::env::current_dir()?)?; + prep_dest(destination.path())?; let outcome = index::checkout( &mut index, @@ -425,12 +470,9 @@ fn probe_gitoxide_dir() -> crate::Result { )) } -fn opts_with_symlink(symlink: bool) -> index::checkout::Options { +fn opts_from_probe() -> index::checkout::Options { index::checkout::Options { - fs: git_worktree::fs::Capabilities { - symlink, - ..Default::default() - }, + fs: probe_gitoxide_dir().unwrap(), destination_is_initially_empty: true, ..Default::default() } From facad25c08b82a975eda70493d4818ca7c560aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 18:02:51 +0800 Subject: [PATCH 13/37] better symlink checking on ubuntu (#301) --- git-worktree/src/os.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index d968acad974..28c546d1404 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -31,7 +31,10 @@ pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { #[cfg(not(windows))] pub fn indicates_collision(err: &std::io::Error) -> bool { // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error(), and ::FileSystemLoop respectively - err.kind() == AlreadyExists || err.raw_os_error() == Some(21) || err.raw_os_error() == Some(62) + err.kind() == AlreadyExists + || err.raw_os_error() == Some(21) + || err.raw_os_error() == Some(62) // no-follow on symlnk on mac-os + || err.raw_os_error() == Some(40) // no-follow on symlnk on ubuntu } #[cfg(windows)] From ea561e6f7d398991f214957dbd92e1b6a81e9ab0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 18:15:51 +0800 Subject: [PATCH 14/37] delete directories recursively on overwrite-existing (#301) --- git-worktree/src/index/entry.rs | 6 +++--- git-worktree/tests/index/checkout.rs | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 8e4cb762d79..ffe58f41e27 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -118,7 +118,7 @@ fn try_write_or_unlink( match op(path) { Ok(res) => Ok(res), Err(err) if os::indicates_collision(&err) => { - try_unlink_anything_non_recursive(path, &std::fs::symlink_metadata(path)?)?; + try_unlink_path_recursively(path, &std::fs::symlink_metadata(path)?)?; op(path) } Err(err) => Err(err), @@ -128,9 +128,9 @@ fn try_write_or_unlink( } } -fn try_unlink_anything_non_recursive(path: &Path, path_meta: &std::fs::Metadata) -> std::io::Result<()> { +fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> std::io::Result<()> { if path_meta.is_dir() { - std::fs::remove_dir(path) + std::fs::remove_dir_all(path) } else if path_meta.is_symlink() { symlink::remove_symlink_auto(path) } else { diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index 3ea4c9d22c4..356cfbea79a 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -152,7 +152,6 @@ fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden( } #[test] -#[ignore] // TODO: needs recursive directory deletion fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { let mut opts = opts_from_probe(); // with overwrite mode @@ -166,7 +165,11 @@ fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { if opts.fs.ignore_case { assert_eq!( stripped_prefix(&source_tree, &source_files), + paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]), + ); + assert_eq!( stripped_prefix(&destination, &worktree_files), + paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]), ); assert!(outcome.collisions.is_empty()); } else { From 0bc94891c92f324d3940e064e8918b117db4641d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 18:36:07 +0800 Subject: [PATCH 15/37] See if we can remove symlinks this way on windows (#301) --- git-worktree/src/index/checkout.rs | 3 ++- git-worktree/src/index/entry.rs | 2 +- git-worktree/src/os.rs | 7 ++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 0b55bb38169..9cac920b74b 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -36,6 +36,7 @@ pub struct PathCache { mod cache { use super::PathCache; + use crate::os; use std::path::{Path, PathBuf}; impl PathCache { @@ -106,7 +107,7 @@ mod cache { if !meta.is_dir() { if self.unlink_on_collision { if meta.is_symlink() { - symlink::remove_symlink_auto(&self.valid)?; + os::remove_symlink(&self.valid)?; } else { std::fs::remove_file(&self.valid)?; } diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index ffe58f41e27..f09324f1add 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -132,7 +132,7 @@ fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> st if path_meta.is_dir() { std::fs::remove_dir_all(path) } else if path_meta.is_symlink() { - symlink::remove_symlink_auto(path) + os::remove_symlink(path) } else { std::fs::remove_file(path) } diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index 28c546d1404..3e319c0a6f6 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -14,7 +14,12 @@ pub fn remove_symlink(path: &Path) -> io::Result<()> { #[cfg(windows)] pub fn remove_symlink(path: &Path) -> io::Result<()> { - symlink::remove_symlink_auto(path) + let meta = std::fs::metadata(path)?; + if meta.is_dir() { + std::fs::remove_dir(path) + } else { + std::fs::remove_file(path) + } } #[cfg(windows)] From 8f3bc5a3195770753b0b6445259ce20ab609b393 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 18:51:59 +0800 Subject: [PATCH 16/37] debug mode for windows (#301) --- git-worktree/src/os.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index 3e319c0a6f6..72a2730d003 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -14,12 +14,8 @@ pub fn remove_symlink(path: &Path) -> io::Result<()> { #[cfg(windows)] pub fn remove_symlink(path: &Path) -> io::Result<()> { - let meta = std::fs::metadata(path)?; - if meta.is_dir() { - std::fs::remove_dir(path) - } else { - std::fs::remove_file(path) - } + dbg!(path, std::fs::symlink_metadata(path), std::fs::metadata(path)); + symlink::remove_symlink_auto(path) } #[cfg(windows)] From 0c18443f5195e10c99504c4f527c1882fcf84e45 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 19:19:35 +0800 Subject: [PATCH 17/37] some more debugging on windows (#301) --- git-worktree/src/fs.rs | 8 +++---- git-worktree/src/index/entry.rs | 1 + git-worktree/tests/index/checkout.rs | 33 +++++++++++++++++++++------- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/git-worktree/src/fs.rs b/git-worktree/src/fs.rs index e301fb386df..c9baf6e185b 100644 --- a/git-worktree/src/fs.rs +++ b/git-worktree/src/fs.rs @@ -96,10 +96,10 @@ impl Capabilities { } let res = std::fs::symlink_metadata(&link_path).map(|m| m.is_symlink()); - let cleanup = std::fs::remove_file(&src_path); - crate::os::remove_symlink(&link_path) - .or_else(|_| std::fs::remove_file(&link_path)) - .and(cleanup)?; + + let cleanup = crate::os::remove_symlink(&link_path).or_else(|_| std::fs::remove_file(&link_path)); + std::fs::remove_file(&src_path).and(cleanup)?; + res } } diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index f09324f1add..7156e272ccb 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -44,6 +44,7 @@ where path: dest.to_path_buf(), })?; + #[cfg_attr(not(unix), allow(unused_mut))] let mut options = open_options(dest, destination_is_initially_empty, overwrite_existing); let needs_executable_bit = executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE; #[cfg(unix)] diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index 356cfbea79a..f5d5a19e3c3 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -96,7 +96,6 @@ mod cache { } } -use bstr::ByteVec; #[cfg(unix)] use std::os::unix::prelude::MetadataExt; use std::{ @@ -286,20 +285,38 @@ fn keep_going_collects_results() { .unwrap(); assert_eq!( - stripped_prefix(&destination, &dir_structure(&destination)), - paths(["dir/sub-dir/symlink", "executable"]), - "some files could not be created" + outcome + .errors + .iter() + .map( + |r| r.error.to_string()[.."object 4f41554f6e0045ef53848fc0c3f33b6a9abc24a9 for checkout at ".len()] + .to_owned() + ) + .collect::>(), + [ + "4f41554f6e0045ef53848fc0c3f33b6a9abc24a9", + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391" + ] + .iter() + .map(|id| format!("object {} for checkout at ", id)) + .collect::>() ); - - assert!(outcome.collisions.is_empty()); assert_eq!( outcome .errors - .into_iter() - .map(|r| Vec::from(r.path).into_path_buf_lossy()) + .iter() + .map(|r| r.path.to_path_lossy().into_owned()) .collect::>(), paths(["dir/content", "empty"]) ); + + assert_eq!( + stripped_prefix(&destination, &dir_structure(&destination)), + paths(["dir/sub-dir/symlink", "executable"]), + "some files could not be created" + ); + + assert!(outcome.collisions.is_empty()); } #[test] From 9ea1e4474a3ce803da7a56e1fc1748f65c11a876 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 20:07:12 +0800 Subject: [PATCH 18/37] refactor (#301) --- git-repository/tests/easy/id.rs | 6 +----- tests/tools/src/lib.rs | 7 +++++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/git-repository/tests/easy/id.rs b/git-repository/tests/easy/id.rs index 7330784630e..88f46e4c628 100644 --- a/git-repository/tests/easy/id.rs +++ b/git-repository/tests/easy/id.rs @@ -12,11 +12,7 @@ fn prefix() -> crate::Result { // TODO: do this in-memory (with or without writing to disk) assert!( - std::process::Command::new("git") - .current_dir(worktree_dir.path()) - .args(["config", "--int", "core.abbrev", "5"]) - .status()? - .success(), + git_testtools::run_git(worktree_dir.path(), &["config", "--int", "core.abbrev", "5"])?.success(), "set core abbrev value successfully" ); diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index 30573cb241a..c3123ce2faa 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -12,6 +12,13 @@ pub use tempfile; static SCRIPT_IDENTITY: Lazy>> = Lazy::new(|| Mutex::new(BTreeMap::new())); +pub fn run_git(working_dir: &Path, args: &[&str]) -> std::io::Result { + std::process::Command::new("git") + .current_dir(working_dir) + .args(args) + .status() +} + pub fn hex_to_id(hex: &str) -> git_hash::ObjectId { git_hash::ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex") } From ff95265a35fb9f340c3a9fa78f8beba24d6734ff Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 20:17:57 +0800 Subject: [PATCH 19/37] try to fix windows once again (#301) --- git-worktree/src/os.rs | 11 +++++++++- git-worktree/tests/index/checkout.rs | 31 ++++++++++------------------ 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index 72a2730d003..fcbebf0b467 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -12,10 +12,19 @@ pub fn remove_symlink(path: &Path) -> io::Result<()> { std::fs::remove_file(path) } +// TODO: use the `symlink` crate once it can delete directory symlinks #[cfg(windows)] pub fn remove_symlink(path: &Path) -> io::Result<()> { dbg!(path, std::fs::symlink_metadata(path), std::fs::metadata(path)); - symlink::remove_symlink_auto(path) + if let Ok(meta) = std::fs::metadata(path) { + if meta.is_file() { + std::fs::remove_file(path) // this removes the link itself + } else { + std::fs::remove_dir(path) // however, this sees the destination directory, which isn't the right thing actually + } + } else { + std::fs::remove_file(path).or_else(|_| std::fs::remove_dir(path)) + } } #[cfg(windows)] diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index f5d5a19e3c3..b4239a66de9 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -58,7 +58,7 @@ mod cache { symlink::symlink_dir(&forbidden, tmp.path().join("link-to-dir")).unwrap(); std::fs::write(tmp.path().join("file-in-dir"), &[]).unwrap(); - for dirname in &["link-to-dir", "file-in-dir"] { + for dirname in &["file-in-dir", "link-to-dir"] { cache.unlink_on_collision = false; let relative_path = format!("{}/file", dirname); assert_eq!( @@ -284,35 +284,26 @@ fn keep_going_collects_results() { ) .unwrap(); - assert_eq!( - outcome - .errors - .iter() - .map( - |r| r.error.to_string()[.."object 4f41554f6e0045ef53848fc0c3f33b6a9abc24a9 for checkout at ".len()] - .to_owned() - ) - .collect::>(), - [ - "4f41554f6e0045ef53848fc0c3f33b6a9abc24a9", - "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391" - ] - .iter() - .map(|id| format!("object {} for checkout at ", id)) - .collect::>() - ); assert_eq!( outcome .errors .iter() .map(|r| r.path.to_path_lossy().into_owned()) .collect::>(), - paths(["dir/content", "empty"]) + paths(if opts.fs.symlink { + ["dir/content", "empty"] + } else { + ["dir/content", "dir/sub-dir/symlink"] // not actually a symlink anymore + }) ); assert_eq!( stripped_prefix(&destination, &dir_structure(&destination)), - paths(["dir/sub-dir/symlink", "executable"]), + paths(if opts.fs.symlink { + ["dir/sub-dir/symlink", "executable"] + } else { + ["empty", "executable"] + }), "some files could not be created" ); From 81bcb8d281099e952a5e3c075d9578f15f2f2a0d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Mar 2022 20:42:55 +0800 Subject: [PATCH 20/37] fix windows test expecations for good (#301) On windows, there are no symlinks, and we change our expectations accordingly. --- git-worktree/Cargo.toml | 2 +- git-worktree/src/os.rs | 1 - git-worktree/tests/index/checkout.rs | 10 ++++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/git-worktree/Cargo.toml b/git-worktree/Cargo.toml index 012e17cd9e3..7249ef619ee 100644 --- a/git-worktree/Cargo.toml +++ b/git-worktree/Cargo.toml @@ -28,7 +28,6 @@ thiserror = "1.0.26" bstr = { version = "0.2.13", default-features = false } document-features = { version = "0.2.0", optional = true } -symlink = "0.1.0" [target.'cfg(unix)'.dependencies] libc = "0.2.119" @@ -36,6 +35,7 @@ libc = "0.2.119" [dev-dependencies] git-testtools = { path = "../tests/tools" } git-odb = { path = "../git-odb" } +symlink = "0.1.0" walkdir = "2.3.2" tempfile = "3.2.0" diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index fcbebf0b467..620162e57e0 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -15,7 +15,6 @@ pub fn remove_symlink(path: &Path) -> io::Result<()> { // TODO: use the `symlink` crate once it can delete directory symlinks #[cfg(windows)] pub fn remove_symlink(path: &Path) -> io::Result<()> { - dbg!(path, std::fs::symlink_metadata(path), std::fs::metadata(path)); if let Ok(meta) = std::fs::metadata(path) { if meta.is_file() { std::fs::remove_file(path) // this removes the link itself diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index b4239a66de9..e5595497577 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -235,7 +235,8 @@ fn overwriting_files_and_lone_directories_works() { ); let symlink = destination.path().join("dir/sub-dir/symlink"); - assert!(std::fs::symlink_metadata(&symlink).unwrap().is_symlink(),); + // on windows, git won't create symlinks as its probe won't detect the capability, even though we do. + assert_eq!(std::fs::symlink_metadata(&symlink).unwrap().is_symlink(), cfg!(unix)); assert_eq!(std::fs::read(symlink).unwrap(), b"other content"); } @@ -290,16 +291,17 @@ fn keep_going_collects_results() { .iter() .map(|r| r.path.to_path_lossy().into_owned()) .collect::>(), - paths(if opts.fs.symlink { + paths(if cfg!(unix) { ["dir/content", "empty"] } else { - ["dir/content", "dir/sub-dir/symlink"] // not actually a symlink anymore + // not actually a symlink anymore, even though symlinks are supported but git think differently. + ["dir/content", "dir/sub-dir/symlink"] }) ); assert_eq!( stripped_prefix(&destination, &dir_structure(&destination)), - paths(if opts.fs.symlink { + paths(if cfg!(unix) { ["dir/sub-dir/symlink", "executable"] } else { ["empty", "executable"] From c3c31afb9dee5040abef7a8d6f8e1e2cba29e2d7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 10:33:57 +0800 Subject: [PATCH 21/37] refactor (#301) --- git-worktree/src/index/checkout.rs | 12 ++++-------- git-worktree/src/index/entry.rs | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 9cac920b74b..cfcbbe46b98 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -101,7 +101,7 @@ mod cache { self.test_mkdir_calls += 1; } match std::fs::create_dir(&self.valid) { - Ok(()) => Ok(()), + Ok(()) => {} Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { let meta = self.valid.symlink_metadata()?; if !meta.is_dir() { @@ -116,19 +116,15 @@ mod cache { self.test_mkdir_calls += 1; } std::fs::create_dir(&self.valid)?; - Ok(()) } else { - Err(err) + return Err(err); } - } else { - Ok(()) } } - Err(err) => Err(err), + Err(err) => return Err(err), } - } else { - Ok(()) } + Ok(()) })(); if let Err(err) = res { diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 7156e272ccb..975964f63fa 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -139,17 +139,18 @@ fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> st } } +#[cfg(not(debug_assertions))] +fn debug_assert_dest_is_no_symlink(_path: &Path) {} + /// This is a debug assertion as we expect the machinery calling this to prevent this possibility in the first place +#[cfg(debug_assertions)] fn debug_assert_dest_is_no_symlink(path: &Path) { - #[cfg(debug_assertions)] - { - if let Ok(meta) = path.metadata() { - debug_assert!( - !meta.is_symlink(), - "BUG: should not ever allow to overwrite/write-into the target of a symbolic link: {}", - path.display() - ); - } + if let Ok(meta) = path.metadata() { + debug_assert!( + !meta.is_symlink(), + "BUG: should not ever allow to overwrite/write-into the target of a symbolic link: {}", + path.display() + ); } } From 542f49beb811f7f9bf9dff3cd19694498f6cf9e2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 10:57:33 +0800 Subject: [PATCH 22/37] refactor (#301) --- git-worktree/src/index/entry.rs | 6 - gitoxide-core/src/index/checkout.rs | 131 ++++++++++++++++++ gitoxide-core/src/index/entries.rs | 31 +++++ gitoxide-core/src/index/mod.rs | 204 ++++------------------------ 4 files changed, 187 insertions(+), 185 deletions(-) create mode 100644 gitoxide-core/src/index/checkout.rs diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 975964f63fa..ddce5579b37 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -80,13 +80,7 @@ where let symlink_destination = git_features::path::from_byte_slice(obj.data) .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; - // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether - // a checkout is needed at all. if symlink { - // TODO: handle 'overwrite_existing' mode, which checks for 'exists' errors and unlinks existing files - // or directories or symlinks. Doing this shouldn't invalidate the cache as it's only valid till - // our parent anyway, but it may invalidate caches in other threads without their knowledge. - // collisions with overwrite mode must be handled with great care in parallel mode. try_write_or_unlink(dest, overwrite_existing, |p| { crate::os::create_symlink(symlink_destination, p) })?; diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs new file mode 100644 index 00000000000..eb6074dea31 --- /dev/null +++ b/gitoxide-core/src/index/checkout.rs @@ -0,0 +1,131 @@ +use crate::index; +use crate::index::{parse_file, Options}; +use anyhow::bail; +use git::{odb::FindExt, worktree::index::checkout, Progress}; +use git_repository as git; +use std::path::{Path, PathBuf}; + +pub fn checkout_exclusive( + index_path: impl AsRef, + dest_directory: impl AsRef, + repo: Option, + mut err: impl std::io::Write, + mut progress: impl Progress, + index::checkout_exclusive::Options { + index: Options { object_hash, .. }, + empty_files, + keep_going, + }: index::checkout_exclusive::Options, +) -> anyhow::Result<()> { + let repo = repo + .map(|dir| git_repository::discover(dir).map(|r| r.apply_environment())) + .transpose()?; + + let dest_directory = dest_directory.as_ref(); + if dest_directory.exists() { + bail!( + "Refusing to checkout index into existing directory '{}' - remove it and try again", + dest_directory.display() + ) + } + std::fs::create_dir_all(dest_directory)?; + + let mut index = parse_file(index_path, object_hash)?; + + let mut num_skipped = 0; + let maybe_symlink_mode = if !empty_files && repo.is_some() { + git::index::entry::Mode::DIR + } else { + git::index::entry::Mode::SYMLINK + }; + for entry in index.entries_mut().iter_mut().filter(|e| { + e.mode + .contains(maybe_symlink_mode | git::index::entry::Mode::DIR | git::index::entry::Mode::COMMIT) + }) { + entry.flags.insert(git::index::entry::Flags::SKIP_WORKTREE); + num_skipped += 1; + } + if num_skipped > 0 { + progress.info(format!("Skipping {} DIR/SYMLINK/COMMIT entries", num_skipped)); + } + + let opts = git::worktree::index::checkout::Options { + fs: git::worktree::fs::Capabilities::probe(dest_directory), + + destination_is_initially_empty: true, + overwrite_existing: false, + keep_going, + ..Default::default() + }; + + let mut files = progress.add_child("checkout"); + let mut bytes = progress.add_child("writing"); + + let entries_for_checkout = index.entries().len() - num_skipped; + files.init(Some(entries_for_checkout), git::progress::count("files")); + bytes.init(None, git::progress::bytes()); + + let start = std::time::Instant::now(); + let checkout::Outcome { errors, collisions } = match &repo { + Some(repo) => git::worktree::index::checkout( + &mut index, + dest_directory, + |oid, buf| { + repo.objects.find_blob(oid, buf).ok(); + if empty_files { + // We always want to query the ODB here… + repo.objects.find_blob(oid, buf)?; + buf.clear(); + // …but write nothing + Ok(git::objs::BlobRef { data: buf }) + } else { + repo.objects.find_blob(oid, buf) + } + }, + &mut files, + &mut bytes, + opts, + ), + None => git::worktree::index::checkout( + &mut index, + dest_directory, + |_, buf| { + buf.clear(); + Ok(git::objs::BlobRef { data: buf }) + }, + &mut files, + &mut bytes, + opts, + ), + }?; + + files.show_throughput(start); + bytes.show_throughput(start); + + progress.done(format!( + "Created {} {} files", + entries_for_checkout.saturating_sub(errors.len() + collisions.len()), + repo.is_none().then(|| "empty").unwrap_or_default() + )); + + if !(collisions.is_empty() && errors.is_empty()) { + let mut messages = Vec::new(); + if !errors.is_empty() { + messages.push(format!("kept going through {} errors(s)", errors.len())); + for record in errors { + writeln!(err, "{}: {}", record.path, record.error).ok(); + } + } + if !collisions.is_empty() { + messages.push(format!("encountered {} collision(s)", collisions.len())); + for col in collisions { + writeln!(err, "{}: collision ({:?})", col.path, col.error_kind).ok(); + } + } + bail!( + "One or more errors occurred - checkout is incomplete: {}", + messages.join(", ") + ); + } + Ok(()) +} diff --git a/gitoxide-core/src/index/entries.rs b/gitoxide-core/src/index/entries.rs index 01e718824c5..b6133866103 100644 --- a/gitoxide-core/src/index/entries.rs +++ b/gitoxide-core/src/index/entries.rs @@ -1,4 +1,35 @@ +use crate::index::{parse_file, Options}; use git_repository as git; +use std::path::Path; + +pub fn entries( + index_path: impl AsRef, + mut out: impl std::io::Write, + Options { object_hash, format }: Options, +) -> anyhow::Result<()> { + use crate::OutputFormat::*; + let file = parse_file(index_path, object_hash)?; + + #[cfg(feature = "serde1")] + if let Json = format { + out.write_all(b"[\n")?; + } + + let mut entries = file.entries().iter().peekable(); + while let Some(entry) = entries.next() { + match format { + Human => to_human(&mut out, &file, entry)?, + #[cfg(feature = "serde1")] + Json => to_json(&mut out, &file, entry, entries.peek().is_none())?, + } + } + + #[cfg(feature = "serde1")] + if let Json = format { + out.write_all(b"]\n")?; + } + Ok(()) +} #[cfg(feature = "serde1")] pub(crate) fn to_json( diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index 5ff85231e21..5ef24b94266 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -1,9 +1,6 @@ -use anyhow::bail; -use std::path::{Path, PathBuf}; +use std::path::Path; use git_repository as git; -use git_repository::worktree::index::checkout; -use git_repository::{odb::FindExt, Progress}; pub struct Options { pub object_hash: git::hash::Kind, @@ -11,9 +8,33 @@ pub struct Options { } mod entries; +pub use entries::entries; pub mod information; +fn parse_file(index_path: impl AsRef, object_hash: git::hash::Kind) -> anyhow::Result { + git::index::File::at( + index_path.as_ref(), + git::index::decode::Options { + object_hash, + ..Default::default() + }, + ) + .map_err(Into::into) +} + +pub mod checkout_exclusive { + pub struct Options { + pub index: super::Options, + /// If true, all files will be written with zero bytes despite having made an ODB lookup. + pub empty_files: bool, + pub keep_going: bool, + } +} + +mod checkout; +pub use checkout::checkout_exclusive; + pub fn verify( index_path: impl AsRef, mut out: impl std::io::Write, @@ -61,178 +82,3 @@ pub fn information( } } } - -pub fn entries( - index_path: impl AsRef, - mut out: impl std::io::Write, - Options { object_hash, format }: Options, -) -> anyhow::Result<()> { - use crate::OutputFormat::*; - let file = parse_file(index_path, object_hash)?; - - #[cfg(feature = "serde1")] - if let Json = format { - out.write_all(b"[\n")?; - } - - let mut entries = file.entries().iter().peekable(); - while let Some(entry) = entries.next() { - match format { - Human => entries::to_human(&mut out, &file, entry)?, - #[cfg(feature = "serde1")] - Json => entries::to_json(&mut out, &file, entry, entries.peek().is_none())?, - } - } - - #[cfg(feature = "serde1")] - if let Json = format { - out.write_all(b"]\n")?; - } - Ok(()) -} - -fn parse_file(index_path: impl AsRef, object_hash: git::hash::Kind) -> anyhow::Result { - git::index::File::at( - index_path.as_ref(), - git::index::decode::Options { - object_hash, - ..Default::default() - }, - ) - .map_err(Into::into) -} - -pub mod checkout_exclusive { - pub struct Options { - pub index: super::Options, - /// If true, all files will be written with zero bytes despite having made an ODB lookup. - pub empty_files: bool, - pub keep_going: bool, - } -} - -pub fn checkout_exclusive( - index_path: impl AsRef, - dest_directory: impl AsRef, - repo: Option, - mut err: impl std::io::Write, - mut progress: impl Progress, - checkout_exclusive::Options { - index: Options { object_hash, .. }, - empty_files, - keep_going, - }: checkout_exclusive::Options, -) -> anyhow::Result<()> { - let repo = repo - .map(|dir| git_repository::discover(dir).map(|r| r.apply_environment())) - .transpose()?; - - let dest_directory = dest_directory.as_ref(); - if dest_directory.exists() { - bail!( - "Refusing to checkout index into existing directory '{}' - remove it and try again", - dest_directory.display() - ) - } - std::fs::create_dir_all(dest_directory)?; - - let mut index = parse_file(index_path, object_hash)?; - - let mut num_skipped = 0; - let maybe_symlink_mode = if !empty_files && repo.is_some() { - git::index::entry::Mode::DIR - } else { - git::index::entry::Mode::SYMLINK - }; - for entry in index.entries_mut().iter_mut().filter(|e| { - e.mode - .contains(maybe_symlink_mode | git::index::entry::Mode::DIR | git::index::entry::Mode::COMMIT) - }) { - entry.flags.insert(git::index::entry::Flags::SKIP_WORKTREE); - num_skipped += 1; - } - if num_skipped > 0 { - progress.info(format!("Skipping {} DIR/SYMLINK/COMMIT entries", num_skipped)); - } - - let opts = git::worktree::index::checkout::Options { - fs: git::worktree::fs::Capabilities::probe(dest_directory), - - // TODO: turn the two following flags into an enum - destination_is_initially_empty: true, - overwrite_existing: false, - keep_going, - ..Default::default() - }; - - let mut files = progress.add_child("checkout"); - let mut bytes = progress.add_child("writing"); - - let entries_for_checkout = index.entries().len() - num_skipped; - files.init(Some(entries_for_checkout), git::progress::count("files")); - bytes.init(None, git::progress::bytes()); - - let start = std::time::Instant::now(); - let checkout::Outcome { errors, collisions } = match &repo { - Some(repo) => git::worktree::index::checkout( - &mut index, - dest_directory, - |oid, buf| { - repo.objects.find_blob(oid, buf).ok(); - if empty_files { - // We always want to query the ODB here… - repo.objects.find_blob(oid, buf)?; - buf.clear(); - // …but write nothing - Ok(git::objs::BlobRef { data: buf }) - } else { - repo.objects.find_blob(oid, buf) - } - }, - &mut files, - &mut bytes, - opts, - ), - None => git::worktree::index::checkout( - &mut index, - dest_directory, - |_, buf| { - buf.clear(); - Ok(git::objs::BlobRef { data: buf }) - }, - &mut files, - &mut bytes, - opts, - ), - }?; - - files.show_throughput(start); - bytes.show_throughput(start); - - progress.done(format!( - "Created {} {} files", - entries_for_checkout.saturating_sub(errors.len() + collisions.len()), - repo.is_none().then(|| "empty").unwrap_or_default() - )); - - if !(collisions.is_empty() && errors.is_empty()) { - let mut messages = Vec::new(); - if !errors.is_empty() { - messages.push(format!("kept going through {} errors(s)", errors.len())); - for record in errors { - writeln!(err, "{}: {}", record.path, record.error).ok(); - } - } - if !collisions.is_empty() { - messages.push(format!("encountered {} collision(s)", collisions.len())); - for col in collisions { - writeln!(err, "{}: collision ({:?})", col.path, col.error_kind).ok(); - } - } - bail!( - "One or more errors occurred - checkout is incomplete: {}", - messages.join(", ") - ); - } - Ok(()) -} From 0f0d390c475044a75e5db4dcd831d755e74aa3e9 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 13:06:15 +0800 Subject: [PATCH 23/37] fix `interrupt::Iter` (#301) --- git-features/src/interrupt.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/git-features/src/interrupt.rs b/git-features/src/interrupt.rs index d689e3915ea..cd07207fec6 100644 --- a/git-features/src/interrupt.rs +++ b/git-features/src/interrupt.rs @@ -38,7 +38,10 @@ where fn next(&mut self) -> Option { self.make_err.as_ref()?; if self.should_interrupt.load(Ordering::Relaxed) { - return Some(Err(self.make_err.take().expect("no bug")())); + return match self.make_err.take() { + Some(f) => Some(Err(f())), + None => None, + }; } match self.inner.next() { Some(next) => Some(Ok(next)), From 8945d95f7fa88562d37ff67ac6e38bead73dd2df Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 13:12:04 +0800 Subject: [PATCH 24/37] feat!: `interrupt::Iter`, rename `interrupt::Iter` -> `interrupt::IterWithError` (#301) --- git-features/src/interrupt.rs | 47 ++++++++++++++++++++++-- git-features/src/parallel/in_parallel.rs | 3 +- git-repository/src/interrupt.rs | 4 +- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/git-features/src/interrupt.rs b/git-features/src/interrupt.rs index cd07207fec6..4cdbd373b4c 100644 --- a/git-features/src/interrupt.rs +++ b/git-features/src/interrupt.rs @@ -4,15 +4,54 @@ use std::{ sync::atomic::{AtomicBool, Ordering}, }; +/// A wrapper for an inner iterator which will check for interruptions on each iteration, stopping the iteration when +/// that is requested. +pub struct Iter<'a, I> { + /// The actual iterator to yield elements from. + pub inner: I, + should_interrupt: &'a AtomicBool, +} + +impl<'a, I> Iter<'a, I> +where + I: Iterator, +{ + /// Create a new iterator over `inner` which checks for interruptions on each iteration on `should_interrupt`. + /// + /// Note that this means the consumer of the iterator data should also be able to access `should_interrupt` and + /// consider it when producing the final result to avoid claiming success even though the operation is only partially + /// complete. + pub fn new(inner: I, should_interrupt: &'a AtomicBool) -> Self { + Iter { + inner, + should_interrupt, + } + } +} + +impl<'a, I> Iterator for Iter<'a, I> +where + I: Iterator, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + if self.should_interrupt.load(Ordering::Relaxed) { + return None; + } + self.inner.next() + } +} + /// A wrapper for an inner iterator which will check for interruptions on each iteration. -pub struct Iter<'a, I, EFN> { +pub struct IterWithErr<'a, I, EFN> { /// The actual iterator to yield elements from. pub inner: I, make_err: Option, should_interrupt: &'a AtomicBool, } -impl<'a, I, EFN, E> Iter<'a, I, EFN> +impl<'a, I, EFN, E> IterWithErr<'a, I, EFN> where I: Iterator, EFN: FnOnce() -> E, @@ -20,7 +59,7 @@ where /// Create a new iterator over `inner` which checks for interruptions on each iteration and cals `make_err()` to /// signal an interruption happened, causing no further items to be iterated from that point on. pub fn new(inner: I, make_err: EFN, should_interrupt: &'a AtomicBool) -> Self { - Iter { + IterWithErr { inner, make_err: Some(make_err), should_interrupt, @@ -28,7 +67,7 @@ where } } -impl<'a, I, EFN, E> Iterator for Iter<'a, I, EFN> +impl<'a, I, EFN, E> Iterator for IterWithErr<'a, I, EFN> where I: Iterator, EFN: FnOnce() -> E, diff --git a/git-features/src/parallel/in_parallel.rs b/git-features/src/parallel/in_parallel.rs index 7eab2087662..8925a780e44 100644 --- a/git-features/src/parallel/in_parallel.rs +++ b/git-features/src/parallel/in_parallel.rs @@ -13,7 +13,8 @@ pub fn join(left: impl FnOnce() -> O1 + Send, right: impl Fn /// Runs `f` with a scope to be used for spawning threads that will not outlive the function call. /// That way it's possible to handle threads without needing the 'static lifetime for data they interact with. /// -/// Note that the threads should not rely on actual parallelism as threading might be turned off entirely. +/// Note that the threads should not rely on actual parallelism as threading might be turned off entirely, hence should not +/// connect each other with channels as deadlock would occour in single-threaded mode. pub fn threads<'env, F, R>(f: F) -> std::thread::Result where F: FnOnce(&crossbeam_utils::thread::Scope<'env>) -> R, diff --git a/git-repository/src/interrupt.rs b/git-repository/src/interrupt.rs index 95957fc06d2..39fd259f7fa 100644 --- a/git-repository/src/interrupt.rs +++ b/git-repository/src/interrupt.rs @@ -62,7 +62,7 @@ pub use init::init_handler; /// A wrapper for an inner iterator which will check for interruptions on each iteration. pub struct Iter { /// The actual iterator to yield elements from. - inner: git_features::interrupt::Iter<'static, I, EFN>, + inner: git_features::interrupt::IterWithErr<'static, I, EFN>, } impl Iter @@ -74,7 +74,7 @@ where /// signal an interruption happened, causing no further items to be iterated from that point on. pub fn new(inner: I, make_err: EFN) -> Self { Iter { - inner: git_features::interrupt::Iter::new(inner, make_err, &IS_INTERRUPTED), + inner: git_features::interrupt::IterWithErr::new(inner, make_err, &IS_INTERRUPTED), } } From 8cbe85d135898826a91939726465a9e295c1e24b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 16:06:43 +0800 Subject: [PATCH 25/37] add thread-count and chunk-size computation; interrupt capability (#301) --- git-features/src/parallel/reduce.rs | 6 +++--- git-worktree/src/index/checkout.rs | 5 +++++ git-worktree/src/index/mod.rs | 11 ++++++++++- git-worktree/tests/index/checkout.rs | 2 ++ gitoxide-core/src/index/checkout.rs | 4 ++++ src/plumbing/main.rs | 1 + 6 files changed, 25 insertions(+), 4 deletions(-) diff --git a/git-features/src/parallel/reduce.rs b/git-features/src/parallel/reduce.rs index 3a700317a32..f9992cfd23d 100644 --- a/git-features/src/parallel/reduce.rs +++ b/git-features/src/parallel/reduce.rs @@ -10,7 +10,7 @@ mod stepped { receive_result: std::sync::mpsc::Receiver, /// `join()` will be called on these guards to assure every thread tries to send through a closed channel. When /// that happens, they break out of their loops. - _threads: Vec>, + threads: Vec>, /// The reducer is called only in the thread using the iterator, dropping it has no side effects. reducer: Option, } @@ -21,7 +21,7 @@ mod stepped { drop(std::mem::replace(&mut self.receive_result, sink)); let mut last_err = None; - for handle in std::mem::take(&mut self._threads) { + for handle in std::mem::take(&mut self.threads) { if let Err(err) = handle.join() { last_err = Some(err); }; @@ -82,7 +82,7 @@ mod stepped { receive_result }; Stepwise { - _threads: threads, + threads, receive_result, reducer: Some(reducer), } diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index cfcbbe46b98..54776a69399 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -164,6 +164,10 @@ pub struct Outcome { pub struct Options { /// capabilities of the file system pub fs: crate::fs::Capabilities, + /// If set, don't use more than this amount of threads. + /// Otherwise, usually use as many threads as there are logical cores. + /// A value of 0 is interpreted as no-limit + pub thread_limit: Option, /// If true, we assume no file to exist in the target directory, and want exclusive access to it. /// This should be enabled when cloning to avoid checks for freshness of files. This also enables /// detection of collisions based on whether or not exclusive file creation succeeds or fails. @@ -194,6 +198,7 @@ impl Default for Options { fn default() -> Self { Options { fs: Default::default(), + thread_limit: None, destination_is_initially_empty: false, keep_going: false, trust_ctime: true, diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 856dc3b51a1..f262e370cb3 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,6 +1,8 @@ use bstr::BStr; +use git_features::interrupt; use git_features::progress::Progress; use git_hash::oid; +use std::sync::atomic::AtomicBool; use crate::index::checkout::{ErrorRecord, PathCache}; use crate::{index, os}; @@ -14,6 +16,7 @@ pub fn checkout( find: Find, files: &mut impl Progress, bytes: &mut impl Progress, + should_interrupt: &AtomicBool, options: checkout::Options, ) -> Result> where @@ -35,8 +38,14 @@ where find, options, }; + let (chunk_size, _, num_threads) = git_features::parallel::optimize_chunk_size_and_thread_limit( + 100, + index.entries().len().into(), + options.thread_limit, + None, + ); - for (entry, entry_path) in index.entries_mut_with_paths() { + for (entry, entry_path) in interrupt::Iter::new(index.entries_mut_with_paths(), should_interrupt) { // TODO: write test for that if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { ctx.files.inc(); diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index e5595497577..08c850bc4da 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -109,6 +109,7 @@ use git_object::bstr::ByteSlice; use git_odb::FindExt; use git_worktree::{fs::Capabilities, index, index::checkout::Collision}; use std::io::ErrorKind::AlreadyExists; +use std::sync::atomic::AtomicBool; use tempfile::TempDir; use crate::fixture_path; @@ -468,6 +469,7 @@ fn checkout_index_in_tmp_dir_opts( }, &mut progress::Discard, &mut progress::Discard, + &AtomicBool::default(), opts, )?; Ok((source_tree, destination, index, outcome)) diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs index eb6074dea31..cf58441b1b3 100644 --- a/gitoxide-core/src/index/checkout.rs +++ b/gitoxide-core/src/index/checkout.rs @@ -4,6 +4,7 @@ use anyhow::bail; use git::{odb::FindExt, worktree::index::checkout, Progress}; use git_repository as git; use std::path::{Path, PathBuf}; +use std::sync::atomic::AtomicBool; pub fn checkout_exclusive( index_path: impl AsRef, @@ -11,6 +12,7 @@ pub fn checkout_exclusive( repo: Option, mut err: impl std::io::Write, mut progress: impl Progress, + should_interrupt: &AtomicBool, index::checkout_exclusive::Options { index: Options { object_hash, .. }, empty_files, @@ -84,6 +86,7 @@ pub fn checkout_exclusive( }, &mut files, &mut bytes, + should_interrupt, opts, ), None => git::worktree::index::checkout( @@ -95,6 +98,7 @@ pub fn checkout_exclusive( }, &mut files, &mut bytes, + should_interrupt, opts, ), }?; diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 1300a9b5575..c08076f95e4 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -97,6 +97,7 @@ pub fn main() -> Result<()> { repository, err, progress, + &should_interrupt, core::index::checkout_exclusive::Options { index: core::index::Options { object_hash, format }, empty_files, From 7575a5854ebe61a5941177efb470143192223ef3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 16:16:21 +0800 Subject: [PATCH 26/37] proper handling of interruptions during checkout (#301) --- git-worktree/src/index/checkout.rs | 2 ++ git-worktree/src/index/mod.rs | 10 ++++++++-- gitoxide-core/src/index/checkout.rs | 23 ++++++++++++++++++----- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 54776a69399..3931ae3f634 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -156,6 +156,8 @@ pub struct ErrorRecord { } pub struct Outcome { + /// The amount of files updated, or created. + pub files_updated: usize, pub collisions: Vec, pub errors: Vec, } diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index f262e370cb3..e05fe74f417 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -2,7 +2,7 @@ use bstr::BStr; use git_features::interrupt; use git_features::progress::Progress; use git_hash::oid; -use std::sync::atomic::AtomicBool; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use crate::index::checkout::{ErrorRecord, PathCache}; use crate::{index, os}; @@ -28,6 +28,7 @@ where let mut delayed = Vec::new(); + let num_files = AtomicUsize::default(); let mut ctx = Context { buf: Vec::new(), collisions: Vec::new(), @@ -37,8 +38,9 @@ where bytes, find, options, + num_files: &num_files, }; - let (chunk_size, _, num_threads) = git_features::parallel::optimize_chunk_size_and_thread_limit( + let (_chunk_size, _, _num_threads) = git_features::parallel::optimize_chunk_size_and_thread_limit( 100, index.entries().len().into(), options.thread_limit, @@ -72,6 +74,7 @@ where } Ok(checkout::Outcome { + files_updated: ctx.num_files.load(Ordering::Relaxed), collisions: ctx.collisions, errors: ctx.errors, }) @@ -86,6 +89,7 @@ struct Context<'a, P1, P2, Find> { options: checkout::Options, errors: Vec, collisions: Vec, + num_files: &'a AtomicUsize, } fn checkout_entry_handle_result( @@ -100,6 +104,7 @@ fn checkout_entry_handle_result( options, errors, collisions, + num_files, }: &mut Context<'_, P1, P2, Find>, ) -> Result<(), checkout::Error> where @@ -110,6 +115,7 @@ where { let res = entry::checkout(entry, entry_path, find, path_cache, *options, buf); files.inc(); + num_files.fetch_add(1, Ordering::SeqCst); match res { Ok(object_size) => bytes.inc_by(object_size), Err(index::checkout::Error::Io(err)) if os::indicates_collision(&err) => { diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs index cf58441b1b3..1f65f402bce 100644 --- a/gitoxide-core/src/index/checkout.rs +++ b/gitoxide-core/src/index/checkout.rs @@ -4,7 +4,7 @@ use anyhow::bail; use git::{odb::FindExt, worktree::index::checkout, Progress}; use git_repository as git; use std::path::{Path, PathBuf}; -use std::sync::atomic::AtomicBool; +use std::sync::atomic::{AtomicBool, Ordering}; pub fn checkout_exclusive( index_path: impl AsRef, @@ -68,7 +68,11 @@ pub fn checkout_exclusive( bytes.init(None, git::progress::bytes()); let start = std::time::Instant::now(); - let checkout::Outcome { errors, collisions } = match &repo { + let checkout::Outcome { + errors, + collisions, + files_updated, + } = match &repo { Some(repo) => git::worktree::index::checkout( &mut index, dest_directory, @@ -107,9 +111,18 @@ pub fn checkout_exclusive( bytes.show_throughput(start); progress.done(format!( - "Created {} {} files", - entries_for_checkout.saturating_sub(errors.len() + collisions.len()), - repo.is_none().then(|| "empty").unwrap_or_default() + "Created {} {} files{}", + files_updated, + repo.is_none().then(|| "empty").unwrap_or_default(), + should_interrupt + .load(Ordering::Relaxed) + .then(|| { + format!( + " of {}", + entries_for_checkout.saturating_sub(errors.len() + collisions.len()) + ) + }) + .unwrap_or_default(), )); if !(collisions.is_empty() && errors.is_empty()) { From e5f69433e4a6cc7866b666e0baccfa32efb92a7f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 16:59:27 +0800 Subject: [PATCH 27/37] switch index checkout to chunk-based operation (#301) Right now we have a single-threaded optimization that we might as well keep, and what follows is a multi-threaded one using our standard parallelization approach which pays a little extra for chunking. --- git-features/src/lib.rs | 32 +++ git-pack/src/data/output/count/objects/mod.rs | 4 +- .../src/data/output/count/objects/util.rs | 25 -- git-worktree/src/index/mod.rs | 222 +++++++++++------- git-worktree/tests/index/checkout.rs | 1 + 5 files changed, 167 insertions(+), 117 deletions(-) diff --git a/git-features/src/lib.rs b/git-features/src/lib.rs index 12563ceda5e..37220b29b70 100644 --- a/git-features/src/lib.rs +++ b/git-features/src/lib.rs @@ -35,3 +35,35 @@ pub mod zlib; /// #[cfg(feature = "time")] pub mod time; + +/// +pub mod util { + /// An iterator over chunks of input, producing `Vec` with a size of `size`, with the last chunk being the remainder and thus + /// potentially smaller than `size`. + pub struct Chunks { + /// The inner iterator to ask for items. + pub inner: I, + /// The size of chunks to produce + pub size: usize, + } + + impl Iterator for Chunks + where + I: Iterator, + { + type Item = Vec; + + fn next(&mut self) -> Option { + let mut res = Vec::with_capacity(self.size); + let mut items_left = self.size; + for item in &mut self.inner { + res.push(item); + items_left -= 1; + if items_left == 0 { + break; + } + } + (!res.is_empty()).then(|| res) + } + } +} diff --git a/git-pack/src/data/output/count/objects/mod.rs b/git-pack/src/data/output/count/objects/mod.rs index 91a83f0598f..e5e3cfcac16 100644 --- a/git-pack/src/data/output/count/objects/mod.rs +++ b/git-pack/src/data/output/count/objects/mod.rs @@ -61,8 +61,8 @@ where thread_limit, None, ); - let chunks = util::Chunks { - iter: objects_ids, + let chunks = git_features::util::Chunks { + inner: objects_ids, size: chunk_size, }; let seen_objs = dashmap::DashSet::::default(); diff --git a/git-pack/src/data/output/count/objects/util.rs b/git-pack/src/data/output/count/objects/util.rs index a6d85a32c0d..ea8a519fd10 100644 --- a/git-pack/src/data/output/count/objects/util.rs +++ b/git-pack/src/data/output/count/objects/util.rs @@ -22,28 +22,3 @@ mod trait_impls { } } } - -pub struct Chunks { - pub size: usize, - pub iter: I, -} - -impl Iterator for Chunks -where - I: Iterator, -{ - type Item = Vec; - - fn next(&mut self) -> Option { - let mut res = Vec::with_capacity(self.size); - let mut items_left = self.size; - for item in &mut self.iter { - res.push(item); - items_left -= 1; - if items_left == 0 { - break; - } - } - (!res.is_empty()).then(|| res) - } -} diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index e05fe74f417..18510a4f973 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,15 +1,14 @@ -use bstr::BStr; use git_features::interrupt; use git_features::progress::Progress; use git_hash::oid; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use crate::index::checkout::{ErrorRecord, PathCache}; -use crate::{index, os}; +use crate::index::checkout::PathCache; pub mod checkout; pub(crate) mod entry; +/// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome. pub fn checkout( index: &mut git_index::State, dir: impl Into, @@ -23,17 +22,15 @@ where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, E: std::error::Error + Send + Sync + 'static, { - let mut path_cache = PathCache::new(dir.into()); - path_cache.unlink_on_collision = options.overwrite_existing; - - let mut delayed = Vec::new(); - let num_files = AtomicUsize::default(); - let mut ctx = Context { + + let mut ctx = chunk::Context { buf: Vec::new(), - collisions: Vec::new(), - errors: Vec::new(), - path_cache, + path_cache: { + let mut cache = PathCache::new(dir.into()); + cache.unlink_on_collision = options.overwrite_existing; + cache + }, files, bytes, find, @@ -47,96 +44,141 @@ where None, ); - for (entry, entry_path) in interrupt::Iter::new(index.entries_mut_with_paths(), should_interrupt) { - // TODO: write test for that - if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { - ctx.files.inc(); - continue; - } - - // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. - // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is - // before creating it. - // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety - // around writing through symlinks (even though we handle this). - // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. - if entry.mode == git_index::entry::Mode::SYMLINK { - // if entry.mode == git_index::entry::Mode::SYMLINK { - delayed.push((entry, entry_path)); - continue; - } - - checkout_entry_handle_result(entry, entry_path, &mut ctx)?; - } + let chunk::Outcome { + mut collisions, + mut errors, + delayed, + } = chunk::process( + interrupt::Iter::new(index.entries_mut_with_paths(), should_interrupt), + &mut ctx, + )?; for (entry, entry_path) in delayed { - checkout_entry_handle_result(entry, entry_path, &mut ctx)?; + chunk::checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, &mut ctx)?; } Ok(checkout::Outcome { files_updated: ctx.num_files.load(Ordering::Relaxed), - collisions: ctx.collisions, - errors: ctx.errors, + collisions, + errors, }) } -struct Context<'a, P1, P2, Find> { - bytes: &'a mut P1, - files: &'a mut P2, - find: Find, - path_cache: PathCache, - buf: Vec, - options: checkout::Options, - errors: Vec, - collisions: Vec, - num_files: &'a AtomicUsize, -} +mod chunk { + use bstr::BStr; + use git_features::progress::Progress; + use git_hash::oid; + use std::sync::atomic::{AtomicUsize, Ordering}; -fn checkout_entry_handle_result( - entry: &mut git_index::Entry, - entry_path: &BStr, - Context { - bytes, - files, - find, - path_cache, - buf, - options, - errors, - collisions, - num_files, - }: &mut Context<'_, P1, P2, Find>, -) -> Result<(), checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, - E: std::error::Error + Send + Sync + 'static, - P1: Progress, - P2: Progress, -{ - let res = entry::checkout(entry, entry_path, find, path_cache, *options, buf); - files.inc(); - num_files.fetch_add(1, Ordering::SeqCst); - match res { - Ok(object_size) => bytes.inc_by(object_size), - Err(index::checkout::Error::Io(err)) if os::indicates_collision(&err) => { - // We are here because a file existed or was blocked by a directory which shouldn't be possible unless - // we are on a file insensitive file system. - files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); - collisions.push(checkout::Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); + use crate::index::{checkout, checkout::PathCache, entry}; + use crate::{index, os}; + + pub struct Outcome<'a> { + pub collisions: Vec, + pub errors: Vec, + pub delayed: Vec<(&'a mut git_index::Entry, &'a BStr)>, + } + + pub struct Context<'a, P1, P2, Find> { + pub bytes: &'a mut P1, + pub files: &'a mut P2, + pub find: Find, + pub path_cache: PathCache, + pub buf: Vec, + pub options: checkout::Options, + /// We keep these shared so that there is the chance for printing numbers that aren't looking like + /// multiple of chunk sizes. Purely cosmetic. Otherwise it's the same as `files`. + pub num_files: &'a AtomicUsize, + } + + pub fn process<'entry, Find, E, P1, P2>( + entries_with_paths: impl Iterator, + ctx: &mut Context<'_, P1, P2, Find>, + ) -> Result, checkout::Error> + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, + P1: Progress, + P2: Progress, + { + let mut delayed = Vec::new(); + + let mut collisions = Vec::new(); + let mut errors = Vec::new(); + + for (entry, entry_path) in entries_with_paths { + // TODO: write test for that + if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { + ctx.files.inc(); + continue; + } + + // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. + // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is + // before creating it. + // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety + // around writing through symlinks (even though we handle this). + // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. + if entry.mode == git_index::entry::Mode::SYMLINK { + delayed.push((entry, entry_path)); + continue; + } + + checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, ctx)?; } - Err(err) => { - if options.keep_going { - errors.push(ErrorRecord { + Ok(Outcome { + errors, + collisions, + delayed, + }) + } + + pub fn checkout_entry_handle_result( + entry: &mut git_index::Entry, + entry_path: &BStr, + errors: &mut Vec, + collisions: &mut Vec, + Context { + bytes, + files, + find, + path_cache, + buf, + options, + num_files, + }: &mut Context<'_, P1, P2, Find>, + ) -> Result<(), checkout::Error> + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, + P1: Progress, + P2: Progress, + { + let res = entry::checkout(entry, entry_path, find, path_cache, *options, buf); + files.inc(); + num_files.fetch_add(1, Ordering::SeqCst); + match res { + Ok(object_size) => bytes.inc_by(object_size), + Err(index::checkout::Error::Io(err)) if os::indicates_collision(&err) => { + // We are here because a file existed or was blocked by a directory which shouldn't be possible unless + // we are on a file insensitive file system. + files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); + collisions.push(checkout::Collision { path: entry_path.into(), - error: Box::new(err), + error_kind: err.kind(), }); - } else { - return Err(err); } - } - }; - Ok(()) + Err(err) => { + if options.keep_going { + errors.push(checkout::ErrorRecord { + path: entry_path.into(), + error: Box::new(err), + }); + } else { + return Err(err); + } + } + }; + Ok(()) + } } diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/index/checkout.rs index 08c850bc4da..3bc9e7f474f 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/index/checkout.rs @@ -489,6 +489,7 @@ fn opts_from_probe() -> index::checkout::Options { index::checkout::Options { fs: probe_gitoxide_dir().unwrap(), destination_is_initially_empty: true, + thread_limit: Some(1), ..Default::default() } } From 1cd7eb3f720e8b66792c942a99d7d9d85069ec03 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 17:30:14 +0800 Subject: [PATCH 28/37] parallel and non-parallel tests (#301) That should validate that the parallel version is implemented correctly, order shouldn't change nor the outcome on disk. --- git-worktree/Cargo.toml | 14 +++++++++++++- git-worktree/src/index/mod.rs | 14 +++++++++----- git-worktree/tests/worktree-multi-threaded.rs | 2 ++ git-worktree/tests/worktree-single-threaded.rs | 4 ++++ git-worktree/tests/{ => worktree}/fs/mod.rs | 0 .../tests/{ => worktree}/index/checkout.rs | 2 +- git-worktree/tests/{ => worktree}/index/mod.rs | 0 .../tests/{worktree.rs => worktree/mod.rs} | 5 ++--- 8 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 git-worktree/tests/worktree-multi-threaded.rs create mode 100644 git-worktree/tests/worktree-single-threaded.rs rename git-worktree/tests/{ => worktree}/fs/mod.rs (100%) rename git-worktree/tests/{ => worktree}/index/checkout.rs (99%) rename git-worktree/tests/{ => worktree}/index/mod.rs (100%) rename git-worktree/tests/{worktree.rs => worktree/mod.rs} (75%) diff --git a/git-worktree/Cargo.toml b/git-worktree/Cargo.toml index 7249ef619ee..f2a9d49573e 100644 --- a/git-worktree/Cargo.toml +++ b/git-worktree/Cargo.toml @@ -10,12 +10,24 @@ edition = "2018" [lib] doctest = false -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[test]] +name = "multi-threaded" +path = "tests/worktree-multi-threaded.rs" +required-features = ["internal-testing-git-features-parallel"] + +[[test]] +name = "single-threaded" +path = "tests/worktree-single-threaded.rs" +required-features = ["internal-testing-to-avoid-being-run-by-cargo-test-all"] + [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde1 = [ "serde", "bstr/serde1", "git-index/serde1", "git-hash/serde1", "git-object/serde1" ] +internal-testing-git-features-parallel = ["git-features/parallel"] +internal-testing-to-avoid-being-run-by-cargo-test-all = [] + [dependencies] git-index = { version = "^0.1.0", path = "../git-index" } git-hash = { version = "^0.9.0", path = "../git-hash" } diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 18510a4f973..882c9561dae 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -37,7 +37,7 @@ where options, num_files: &num_files, }; - let (_chunk_size, _, _num_threads) = git_features::parallel::optimize_chunk_size_and_thread_limit( + let (_chunk_size, _, num_threads) = git_features::parallel::optimize_chunk_size_and_thread_limit( 100, index.entries().len().into(), options.thread_limit, @@ -48,10 +48,14 @@ where mut collisions, mut errors, delayed, - } = chunk::process( - interrupt::Iter::new(index.entries_mut_with_paths(), should_interrupt), - &mut ctx, - )?; + } = if num_threads == 1 { + chunk::process( + interrupt::Iter::new(index.entries_mut_with_paths(), should_interrupt), + &mut ctx, + )? + } else { + todo!("in parallel") + }; for (entry, entry_path) in delayed { chunk::checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, &mut ctx)?; diff --git a/git-worktree/tests/worktree-multi-threaded.rs b/git-worktree/tests/worktree-multi-threaded.rs new file mode 100644 index 00000000000..5a53f9c26f6 --- /dev/null +++ b/git-worktree/tests/worktree-multi-threaded.rs @@ -0,0 +1,2 @@ +mod worktree; +use worktree::*; diff --git a/git-worktree/tests/worktree-single-threaded.rs b/git-worktree/tests/worktree-single-threaded.rs new file mode 100644 index 00000000000..39b8dea85be --- /dev/null +++ b/git-worktree/tests/worktree-single-threaded.rs @@ -0,0 +1,4 @@ +#[cfg(not(feature = "internal-testing-git-features-parallel"))] +mod worktree; +#[cfg(not(feature = "internal-testing-git-features-parallel"))] +use worktree::*; diff --git a/git-worktree/tests/fs/mod.rs b/git-worktree/tests/worktree/fs/mod.rs similarity index 100% rename from git-worktree/tests/fs/mod.rs rename to git-worktree/tests/worktree/fs/mod.rs diff --git a/git-worktree/tests/index/checkout.rs b/git-worktree/tests/worktree/index/checkout.rs similarity index 99% rename from git-worktree/tests/index/checkout.rs rename to git-worktree/tests/worktree/index/checkout.rs index 3bc9e7f474f..3b8d10e1438 100644 --- a/git-worktree/tests/index/checkout.rs +++ b/git-worktree/tests/worktree/index/checkout.rs @@ -489,7 +489,7 @@ fn opts_from_probe() -> index::checkout::Options { index::checkout::Options { fs: probe_gitoxide_dir().unwrap(), destination_is_initially_empty: true, - thread_limit: Some(1), + thread_limit: git_features::parallel::num_threads(None).into(), ..Default::default() } } diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/worktree/index/mod.rs similarity index 100% rename from git-worktree/tests/index/mod.rs rename to git-worktree/tests/worktree/index/mod.rs diff --git a/git-worktree/tests/worktree.rs b/git-worktree/tests/worktree/mod.rs similarity index 75% rename from git-worktree/tests/worktree.rs rename to git-worktree/tests/worktree/mod.rs index ad199f619be..2a9fb1aecc0 100644 --- a/git-worktree/tests/worktree.rs +++ b/git-worktree/tests/worktree/mod.rs @@ -1,9 +1,8 @@ -use std::path::{Path, PathBuf}; - mod fs; mod index; -type Result = std::result::Result>; +use std::path::{Path, PathBuf}; +pub type Result = std::result::Result>; pub fn fixture_path(name: &str) -> PathBuf { let dir = From 9ecdade0f117b966c98f48d1879bdba21ccaafd7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 17:45:42 +0800 Subject: [PATCH 29/37] decouple amount of bytes written from progress (#301) --- git-worktree/src/index/checkout.rs | 2 ++ git-worktree/src/index/mod.rs | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 3931ae3f634..81098247040 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -158,6 +158,8 @@ pub struct ErrorRecord { pub struct Outcome { /// The amount of files updated, or created. pub files_updated: usize, + /// The amount of bytes written to disk, + pub bytes_written: u64, pub collisions: Vec, pub errors: Vec, } diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 882c9561dae..513f57fa55f 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -47,6 +47,7 @@ where let chunk::Outcome { mut collisions, mut errors, + mut bytes_written, delayed, } = if num_threads == 1 { chunk::process( @@ -58,13 +59,15 @@ where }; for (entry, entry_path) in delayed { - chunk::checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, &mut ctx)?; + bytes_written += + chunk::checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, &mut ctx)? as u64; } Ok(checkout::Outcome { files_updated: ctx.num_files.load(Ordering::Relaxed), collisions, errors, + bytes_written, }) } @@ -81,6 +84,7 @@ mod chunk { pub collisions: Vec, pub errors: Vec, pub delayed: Vec<(&'a mut git_index::Entry, &'a BStr)>, + pub bytes_written: u64, } pub struct Context<'a, P1, P2, Find> { @@ -106,9 +110,9 @@ mod chunk { P2: Progress, { let mut delayed = Vec::new(); - let mut collisions = Vec::new(); let mut errors = Vec::new(); + let mut bytes_written = 0; for (entry, entry_path) in entries_with_paths { // TODO: write test for that @@ -128,9 +132,11 @@ mod chunk { continue; } - checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, ctx)?; + bytes_written += checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, ctx)? as u64; } + Ok(Outcome { + bytes_written, errors, collisions, delayed, @@ -151,7 +157,7 @@ mod chunk { options, num_files, }: &mut Context<'_, P1, P2, Find>, - ) -> Result<(), checkout::Error> + ) -> Result> where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, E: std::error::Error + Send + Sync + 'static, @@ -162,7 +168,10 @@ mod chunk { files.inc(); num_files.fetch_add(1, Ordering::SeqCst); match res { - Ok(object_size) => bytes.inc_by(object_size), + Ok(object_size) => { + bytes.inc_by(object_size); + Ok(object_size) + } Err(index::checkout::Error::Io(err)) if os::indicates_collision(&err) => { // We are here because a file existed or was blocked by a directory which shouldn't be possible unless // we are on a file insensitive file system. @@ -171,6 +180,7 @@ mod chunk { path: entry_path.into(), error_kind: err.kind(), }); + Ok(0) } Err(err) => { if options.keep_going { @@ -178,11 +188,11 @@ mod chunk { path: entry_path.into(), error: Box::new(err), }); + Ok(0) } else { return Err(err); } } - }; - Ok(()) + } } } From 5f29c0f66d0aa6c045bfdf6f39a806ce8c4a5100 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 18:57:58 +0800 Subject: [PATCH 30/37] basic parallelization, without proper reducer, just so it compiles (#301) --- git-worktree/src/index/mod.rs | 43 +++++++++++++++---- git-worktree/tests/worktree/index/checkout.rs | 16 ++++--- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 513f57fa55f..c6d33345090 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,4 +1,5 @@ use git_features::interrupt; +use git_features::parallel::in_parallel; use git_features::progress::Progress; use git_hash::oid; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; @@ -19,43 +20,67 @@ pub fn checkout( options: checkout::Options, ) -> Result> where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Send + Clone, E: std::error::Error + Send + Sync + 'static, { let num_files = AtomicUsize::default(); + let dir = dir.into(); let mut ctx = chunk::Context { buf: Vec::new(), path_cache: { - let mut cache = PathCache::new(dir.into()); + let mut cache = PathCache::new(dir.clone()); cache.unlink_on_collision = options.overwrite_existing; cache }, files, bytes, - find, + find: find.clone(), options, num_files: &num_files, }; - let (_chunk_size, _, num_threads) = git_features::parallel::optimize_chunk_size_and_thread_limit( + let (chunk_size, thread_limit, num_threads) = git_features::parallel::optimize_chunk_size_and_thread_limit( 100, index.entries().len().into(), options.thread_limit, None, ); + let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths(), should_interrupt); let chunk::Outcome { mut collisions, mut errors, mut bytes_written, delayed, } = if num_threads == 1 { - chunk::process( - interrupt::Iter::new(index.entries_mut_with_paths(), should_interrupt), - &mut ctx, - )? + chunk::process(entries_with_paths, &mut ctx)? } else { - todo!("in parallel") + dbg!(thread_limit, num_threads, chunk_size); + in_parallel( + git_features::util::Chunks { + inner: entries_with_paths, + size: chunk_size, + }, + thread_limit, + move |_| { + ( + Vec::::new(), // object buffer + { + let mut cache = PathCache::new(dir.clone()); + cache.unlink_on_collision = options.overwrite_existing; + cache + }, + find.clone(), + ) + }, + |_chunk, (_buf, _path_cache, _find)| { + // chunk::process(chunk, ) + Ok::<_, ()>(()) + }, + git_features::parallel::reduce::IdentityWithResult::default(), + ) + .ok(); + todo!() }; for (entry, entry_path) in delayed { diff --git a/git-worktree/tests/worktree/index/checkout.rs b/git-worktree/tests/worktree/index/checkout.rs index 3b8d10e1438..bc7ecd19a54 100644 --- a/git-worktree/tests/worktree/index/checkout.rs +++ b/git-worktree/tests/worktree/index/checkout.rs @@ -109,7 +109,7 @@ use git_object::bstr::ByteSlice; use git_odb::FindExt; use git_worktree::{fs::Capabilities, index, index::checkout::Collision}; use std::io::ErrorKind::AlreadyExists; -use std::sync::atomic::AtomicBool; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use tempfile::TempDir; use crate::fixture_path; @@ -270,13 +270,14 @@ fn allow_or_disallow_symlinks() -> crate::Result { fn keep_going_collects_results() { let mut opts = opts_from_probe(); opts.keep_going = true; - let mut count = 0; + let mut count = AtomicUsize::default(); let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( opts, "make_mixed_without_submodules", |_id| { - if count < 2 { - count += 1; + if let Ok(_) = count.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { + (current < 2).then(|| current + 1) + }) { false } else { true @@ -442,7 +443,7 @@ fn checkout_index_in_tmp_dir( fn checkout_index_in_tmp_dir_opts( opts: index::checkout::Options, name: &str, - mut allow_return_object: impl FnMut(&git_hash::oid) -> bool, + mut allow_return_object: impl FnMut(&git_hash::oid) -> bool + Send + Clone, prep_dest: impl Fn(&Path) -> std::io::Result<()>, ) -> crate::Result<( PathBuf, @@ -453,7 +454,10 @@ fn checkout_index_in_tmp_dir_opts( let source_tree = fixture_path(name); let git_dir = source_tree.join(".git"); let mut index = git_index::File::at(git_dir.join("index"), Default::default())?; - let odb = git_odb::at(git_dir.join("objects"))?; + let odb = git_odb::at(git_dir.join("objects"))? + .into_inner() + .into_arc() + .to_cache_arc(); let destination = tempfile::tempdir_in(std::env::current_dir()?)?; prep_dest(destination.path())?; From c19331e001e587e4fca74f3e9fec28a7df922c0a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 20:34:18 +0800 Subject: [PATCH 31/37] conversions from Rc to arc for Handle (#301) --- git-odb/src/cache.rs | 22 ++++++++++++ git-odb/src/store_impls/dynamic/handle.rs | 35 +++++++++++++++++++ git-worktree/tests/worktree/index/checkout.rs | 7 ++-- gitoxide-core/src/index/checkout.rs | 34 +++++++++++------- 4 files changed, 80 insertions(+), 18 deletions(-) diff --git a/git-odb/src/cache.rs b/git-odb/src/cache.rs index b5f3162e379..a61fa6763ca 100644 --- a/git-odb/src/cache.rs +++ b/git-odb/src/cache.rs @@ -1,3 +1,4 @@ +use std::rc::Rc; use std::{ cell::RefCell, ops::{Deref, DerefMut}, @@ -16,6 +17,27 @@ pub type ObjectCache = dyn git_pack::cache::Object + Send + 'static; /// A constructor for boxed object caches. pub type NewObjectCacheFn = dyn Fn() -> Box + Send + Sync + 'static; +impl Cache>> { + /// Convert this cache's handle into one that keeps its store in an arc. This creates an entirely new store, + /// so should be done early to avoid unnecessary work (and mappings). + pub fn into_arc(self) -> std::io::Result>>> { + let inner = self.inner.into_arc()?; + Ok(Cache { + inner, + new_pack_cache: self.new_pack_cache, + new_object_cache: self.new_object_cache, + pack_cache: self.pack_cache, + object_cache: self.object_cache, + }) + } +} +impl Cache>> { + /// No op, as we are containing an arc handle already. + pub fn into_arc(self) -> std::io::Result>>> { + Ok(self) + } +} + impl Cache { /// Dissolve this instance, discard all caches, and return the inner implementation. pub fn into_inner(self) -> S { diff --git a/git-odb/src/store_impls/dynamic/handle.rs b/git-odb/src/store_impls/dynamic/handle.rs index e2fb27124d1..33ae0239f3b 100644 --- a/git-odb/src/store_impls/dynamic/handle.rs +++ b/git-odb/src/store_impls/dynamic/handle.rs @@ -1,3 +1,5 @@ +use std::convert::{TryFrom, TryInto}; +use std::rc::Rc; use std::{ cell::RefCell, ops::Deref, @@ -324,6 +326,39 @@ where } } +impl TryFrom<&super::Store> for super::Store { + type Error = std::io::Error; + + fn try_from(s: &super::Store) -> Result { + super::Store::at_opts( + s.path(), + crate::store::init::Options { + slots: crate::store::init::Slots::Given(s.files.len().try_into().expect("BUG: too many slots")), + object_hash: Default::default(), + use_multi_pack_index: false, + }, + ) + } +} + +impl super::Handle> { + /// Convert a ref counted store into one that is ref-counted and thread-safe, by creating a new Store. + pub fn into_arc(self) -> std::io::Result>> { + let store = Arc::new(super::Store::try_from(self.store_ref())?); + let mut cache = store.to_handle_arc(); + cache.refresh = self.refresh; + cache.max_recursion_depth = self.max_recursion_depth; + Ok(cache) + } +} + +impl super::Handle> { + /// Convert a ref counted store into one that is ref-counted and thread-safe, by creating a new Store + pub fn into_arc(self) -> std::io::Result>> { + Ok(self) + } +} + impl Clone for super::Handle where S: Deref + Clone, diff --git a/git-worktree/tests/worktree/index/checkout.rs b/git-worktree/tests/worktree/index/checkout.rs index bc7ecd19a54..a2446a80404 100644 --- a/git-worktree/tests/worktree/index/checkout.rs +++ b/git-worktree/tests/worktree/index/checkout.rs @@ -270,7 +270,7 @@ fn allow_or_disallow_symlinks() -> crate::Result { fn keep_going_collects_results() { let mut opts = opts_from_probe(); opts.keep_going = true; - let mut count = AtomicUsize::default(); + let count = AtomicUsize::default(); let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( opts, "make_mixed_without_submodules", @@ -454,10 +454,7 @@ fn checkout_index_in_tmp_dir_opts( let source_tree = fixture_path(name); let git_dir = source_tree.join(".git"); let mut index = git_index::File::at(git_dir.join("index"), Default::default())?; - let odb = git_odb::at(git_dir.join("objects"))? - .into_inner() - .into_arc() - .to_cache_arc(); + let odb = git_odb::at(git_dir.join("objects"))?.into_inner().into_arc()?; let destination = tempfile::tempdir_in(std::env::current_dir()?)?; prep_dest(destination.path())?; diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs index 1f65f402bce..1ec08cce7b1 100644 --- a/gitoxide-core/src/index/checkout.rs +++ b/gitoxide-core/src/index/checkout.rs @@ -68,24 +68,29 @@ pub fn checkout_exclusive( bytes.init(None, git::progress::bytes()); let start = std::time::Instant::now(); + let no_repo = repo.is_none(); let checkout::Outcome { errors, collisions, files_updated, - } = match &repo { + bytes_written, + } = match repo { Some(repo) => git::worktree::index::checkout( &mut index, dest_directory, - |oid, buf| { - repo.objects.find_blob(oid, buf).ok(); - if empty_files { - // We always want to query the ODB here… - repo.objects.find_blob(oid, buf)?; - buf.clear(); - // …but write nothing - Ok(git::objs::BlobRef { data: buf }) - } else { - repo.objects.find_blob(oid, buf) + { + let objects = repo.objects.into_arc()?; + move |oid, buf| { + objects.find_blob(oid, buf).ok(); + if empty_files { + // We always want to query the ODB here… + objects.find_blob(oid, buf)?; + buf.clear(); + // …but write nothing + Ok(git::objs::BlobRef { data: buf }) + } else { + objects.find_blob(oid, buf) + } } }, &mut files, @@ -111,9 +116,9 @@ pub fn checkout_exclusive( bytes.show_throughput(start); progress.done(format!( - "Created {} {} files{}", + "Created {} {} files{} ({})", files_updated, - repo.is_none().then(|| "empty").unwrap_or_default(), + no_repo.then(|| "empty").unwrap_or_default(), should_interrupt .load(Ordering::Relaxed) .then(|| { @@ -123,6 +128,9 @@ pub fn checkout_exclusive( ) }) .unwrap_or_default(), + git_features::progress::bytes() + .unwrap() + .display(bytes_written as usize, None, None) )); if !(collisions.is_empty() && errors.is_empty()) { From 6bfd865a0578eeacd8d19eaa89d8914ac947c62a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 8 Mar 2022 21:15:42 +0800 Subject: [PATCH 32/37] call chunk processing in threaded processor (#301) --- git-worktree/src/index/mod.rs | 79 +++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index c6d33345090..9d8bcbe9be3 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,6 +1,6 @@ -use git_features::interrupt; use git_features::parallel::in_parallel; use git_features::progress::Progress; +use git_features::{interrupt, progress}; use git_hash::oid; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; @@ -33,8 +33,6 @@ where cache.unlink_on_collision = options.overwrite_existing; cache }, - files, - bytes, find: find.clone(), options, num_files: &num_files, @@ -53,7 +51,7 @@ where mut bytes_written, delayed, } = if num_threads == 1 { - chunk::process(entries_with_paths, &mut ctx)? + chunk::process(entries_with_paths, files, bytes, &mut ctx)? } else { dbg!(thread_limit, num_threads, chunk_size); in_parallel( @@ -62,21 +60,27 @@ where size: chunk_size, }, thread_limit, - move |_| { - ( - Vec::::new(), // object buffer - { - let mut cache = PathCache::new(dir.clone()); - cache.unlink_on_collision = options.overwrite_existing; - cache - }, - find.clone(), - ) - }, - |_chunk, (_buf, _path_cache, _find)| { - // chunk::process(chunk, ) - Ok::<_, ()>(()) + { + let num_files = &num_files; + move |_| { + ( + progress::Discard, + progress::Discard, + chunk::Context { + find: find.clone(), + path_cache: { + let mut cache = PathCache::new(dir.clone()); + cache.unlink_on_collision = options.overwrite_existing; + cache + }, + buf: Vec::new(), + options, + num_files, + }, + ) + } }, + |chunk, (files, bytes, ctx)| chunk::process(chunk.into_iter(), files, bytes, ctx), git_features::parallel::reduce::IdentityWithResult::default(), ) .ok(); @@ -84,8 +88,15 @@ where }; for (entry, entry_path) in delayed { - bytes_written += - chunk::checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, &mut ctx)? as u64; + bytes_written += chunk::checkout_entry_handle_result( + entry, + entry_path, + &mut errors, + &mut collisions, + files, + bytes, + &mut ctx, + )? as u64; } Ok(checkout::Outcome { @@ -112,9 +123,7 @@ mod chunk { pub bytes_written: u64, } - pub struct Context<'a, P1, P2, Find> { - pub bytes: &'a mut P1, - pub files: &'a mut P2, + pub struct Context<'a, Find> { pub find: Find, pub path_cache: PathCache, pub buf: Vec, @@ -124,15 +133,15 @@ mod chunk { pub num_files: &'a AtomicUsize, } - pub fn process<'entry, Find, E, P1, P2>( + pub fn process<'entry, Find, E>( entries_with_paths: impl Iterator, - ctx: &mut Context<'_, P1, P2, Find>, + files: &mut impl Progress, + bytes: &mut impl Progress, + ctx: &mut Context<'_, Find>, ) -> Result, checkout::Error> where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, E: std::error::Error + Send + Sync + 'static, - P1: Progress, - P2: Progress, { let mut delayed = Vec::new(); let mut collisions = Vec::new(); @@ -142,7 +151,7 @@ mod chunk { for (entry, entry_path) in entries_with_paths { // TODO: write test for that if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { - ctx.files.inc(); + files.inc(); continue; } @@ -157,7 +166,9 @@ mod chunk { continue; } - bytes_written += checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, ctx)? as u64; + bytes_written += + checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, files, bytes, ctx)? + as u64; } Ok(Outcome { @@ -168,26 +179,24 @@ mod chunk { }) } - pub fn checkout_entry_handle_result( + pub fn checkout_entry_handle_result( entry: &mut git_index::Entry, entry_path: &BStr, errors: &mut Vec, collisions: &mut Vec, + files: &mut impl Progress, + bytes: &mut impl Progress, Context { - bytes, - files, find, path_cache, buf, options, num_files, - }: &mut Context<'_, P1, P2, Find>, + }: &mut Context<'_, Find>, ) -> Result> where Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, E: std::error::Error + Send + Sync + 'static, - P1: Progress, - P2: Progress, { let res = entry::checkout(entry, entry_path, find, path_cache, *options, buf); files.inc(); From e83079d219c96692725ab8af1c0e656cb331ecd8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 9 Mar 2022 08:19:21 +0800 Subject: [PATCH 33/37] a reducer which produces progress reporting each time it feeds (#301) --- git-worktree/src/index/mod.rs | 65 +++++++++++++++++-- git-worktree/tests/worktree/index/checkout.rs | 32 ++++++--- 2 files changed, 82 insertions(+), 15 deletions(-) diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index 9d8bcbe9be3..c542c761087 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -53,7 +53,6 @@ where } = if num_threads == 1 { chunk::process(entries_with_paths, files, bytes, &mut ctx)? } else { - dbg!(thread_limit, num_threads, chunk_size); in_parallel( git_features::util::Chunks { inner: entries_with_paths, @@ -81,10 +80,14 @@ where } }, |chunk, (files, bytes, ctx)| chunk::process(chunk.into_iter(), files, bytes, ctx), - git_features::parallel::reduce::IdentityWithResult::default(), - ) - .ok(); - todo!() + chunk::Reduce { + files, + bytes, + num_files: &num_files, + aggregate: Default::default(), + marker: Default::default(), + }, + )? }; for (entry, entry_path) in delayed { @@ -116,6 +119,58 @@ mod chunk { use crate::index::{checkout, checkout::PathCache, entry}; use crate::{index, os}; + mod reduce { + use crate::index::checkout; + use git_features::progress::Progress; + use std::marker::PhantomData; + use std::sync::atomic::{AtomicUsize, Ordering}; + + pub struct Reduce<'a, 'entry, P1, P2, E> { + pub files: &'a mut P1, + pub bytes: &'a mut P2, + pub num_files: &'a AtomicUsize, + pub aggregate: super::Outcome<'entry>, + pub marker: PhantomData, + } + + impl<'a, 'entry, P1, P2, E> git_features::parallel::Reduce for Reduce<'a, 'entry, P1, P2, E> + where + P1: Progress, + P2: Progress, + E: std::error::Error + Send + Sync + 'static, + { + type Input = Result, checkout::Error>; + type FeedProduce = (); + type Output = super::Outcome<'entry>; + type Error = checkout::Error; + + fn feed(&mut self, item: Self::Input) -> Result { + let item = item?; + let super::Outcome { + bytes_written, + delayed, + errors, + collisions, + } = item; + self.aggregate.bytes_written += bytes_written; + self.aggregate.delayed.extend(delayed); + self.aggregate.errors.extend(errors); + self.aggregate.collisions.extend(collisions); + + self.bytes.set(self.aggregate.bytes_written as usize); + self.files.set(self.num_files.load(Ordering::Relaxed)); + + Ok(()) + } + + fn finalize(self) -> Result { + Ok(self.aggregate) + } + } + } + pub use reduce::Reduce; + + #[derive(Default)] pub struct Outcome<'a> { pub collisions: Vec, pub errors: Vec, diff --git a/git-worktree/tests/worktree/index/checkout.rs b/git-worktree/tests/worktree/index/checkout.rs index a2446a80404..0b5cfc990c6 100644 --- a/git-worktree/tests/worktree/index/checkout.rs +++ b/git-worktree/tests/worktree/index/checkout.rs @@ -288,17 +288,19 @@ fn keep_going_collects_results() { .unwrap(); assert_eq!( - outcome - .errors - .iter() - .map(|r| r.path.to_path_lossy().into_owned()) - .collect::>(), - paths(if cfg!(unix) { + sort_when_threaded( + outcome + .errors + .iter() + .map(|r| r.path.to_path_lossy().into_owned()) + .collect() + ), + sort_when_threaded(paths(if cfg!(unix) { ["dir/content", "empty"] } else { // not actually a symlink anymore, even though symlinks are supported but git think differently. ["dir/content", "dir/sub-dir/symlink"] - }) + })) ); assert_eq!( @@ -360,8 +362,8 @@ fn collisions_are_detected_on_a_case_sensitive_filesystem() { let error_kind_dir = error_kind; assert_eq!( - outcome.collisions, - vec![ + sort_when_threaded(outcome.collisions), + sort_when_threaded(vec![ Collision { path: "FILE_x".into(), error_kind, @@ -382,7 +384,7 @@ fn collisions_are_detected_on_a_case_sensitive_filesystem() { path: "x".into(), error_kind, }, - ], + ]), "these files couldn't be checked out" ); } @@ -498,3 +500,13 @@ fn opts_from_probe() -> index::checkout::Options { fn paths<'a>(p: impl IntoIterator) -> Vec { p.into_iter().map(PathBuf::from).collect() } + +fn sort_when_threaded(mut p: Vec) -> Vec +where + T: Ord, +{ + if git_features::parallel::num_threads(None) > 1 { + p.sort() + } + p +} From 232832f5355f70f984b5d269ddba61e3b5ec7081 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 9 Mar 2022 08:20:21 +0800 Subject: [PATCH 34/37] run multi-threaded tests as well for worktree (#301) --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 04be1f0940a..d004845649e 100644 --- a/Makefile +++ b/Makefile @@ -149,6 +149,8 @@ unit-tests: ## run all unit tests && cargo test --features "internal-testing-git-features-parallel" cd git-index && cargo test --features internal-testing-to-avoid-being-run-by-cargo-test-all \ && cargo test --features "internal-testing-git-features-parallel" + cd git-worktree && cargo test --features internal-testing-to-avoid-being-run-by-cargo-test-all \ + && cargo test --features "internal-testing-git-features-parallel" cd git-packetline && cargo test \ && cargo test --features blocking-io,maybe-async/is_sync --test blocking-packetline \ && cargo test --features "async-io" --test async-packetline From 07a4094965ac1b4eb223da8e5ca5cc4a86c5f596 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 9 Mar 2022 08:21:50 +0800 Subject: [PATCH 35/37] thanks clippy --- git-features/src/interrupt.rs | 5 +---- git-worktree/src/index/mod.rs | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/git-features/src/interrupt.rs b/git-features/src/interrupt.rs index 4cdbd373b4c..f217ae0a697 100644 --- a/git-features/src/interrupt.rs +++ b/git-features/src/interrupt.rs @@ -77,10 +77,7 @@ where fn next(&mut self) -> Option { self.make_err.as_ref()?; if self.should_interrupt.load(Ordering::Relaxed) { - return match self.make_err.take() { - Some(f) => Some(Err(f())), - None => None, - }; + return self.make_err.take().map(|f| Err(f())); } match self.inner.next() { Some(next) => Some(Ok(next)), diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index c542c761087..1e28919e09e 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -279,7 +279,7 @@ mod chunk { }); Ok(0) } else { - return Err(err); + Err(err) } } } From 07e9081fb5628e4ddc8f87e2d4ba0c7b3247bb35 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 9 Mar 2022 08:27:40 +0800 Subject: [PATCH 36/37] pass thread-limit along to checkout (#301) --- gitoxide-core/src/index/checkout.rs | 2 ++ gitoxide-core/src/index/mod.rs | 4 ++++ src/plumbing/main.rs | 1 + 3 files changed, 7 insertions(+) diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs index 1ec08cce7b1..3bbe1f269d9 100644 --- a/gitoxide-core/src/index/checkout.rs +++ b/gitoxide-core/src/index/checkout.rs @@ -17,6 +17,7 @@ pub fn checkout_exclusive( index: Options { object_hash, .. }, empty_files, keep_going, + thread_limit, }: index::checkout_exclusive::Options, ) -> anyhow::Result<()> { let repo = repo @@ -57,6 +58,7 @@ pub fn checkout_exclusive( destination_is_initially_empty: true, overwrite_existing: false, keep_going, + thread_limit, ..Default::default() }; diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index 5ef24b94266..3a2af2460e4 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -29,6 +29,10 @@ pub mod checkout_exclusive { /// If true, all files will be written with zero bytes despite having made an ODB lookup. pub empty_files: bool, pub keep_going: bool, + /// If set, don't use more than this amount of threads. + /// Otherwise, usually use as many threads as there are logical cores. + /// A value of 0 is interpreted as no-limit + pub thread_limit: Option, } } diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index c08076f95e4..11ef6ed4bf1 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -102,6 +102,7 @@ pub fn main() -> Result<()> { index: core::index::Options { object_hash, format }, empty_files, keep_going, + thread_limit, }, ) }, From 21d6f880293de4e8ffc6a8472eb1b54d8b1b105a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 9 Mar 2022 08:33:48 +0800 Subject: [PATCH 37/37] stabilize assertions in parallel mode (#301) --- etc/check-package-size.sh | 4 +- git-worktree/tests/worktree/index/checkout.rs | 83 +++++++++++-------- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/etc/check-package-size.sh b/etc/check-package-size.sh index 1e01333e66c..482ba5aea78 100755 --- a/etc/check-package-size.sh +++ b/etc/check-package-size.sh @@ -19,7 +19,7 @@ echo "in root: gitoxide CLI" (enter cargo-smart-release && indent cargo diet -n --package-size-limit 85KB) (enter git-actor && indent cargo diet -n --package-size-limit 5KB) (enter git-index && indent cargo diet -n --package-size-limit 30KB) -(enter git-worktree && indent cargo diet -n --package-size-limit 15KB) +(enter git-worktree && indent cargo diet -n --package-size-limit 20KB) (enter git-revision && indent cargo diet -n --package-size-limit 10KB) (enter git-bitmap && indent cargo diet -n --package-size-limit 5KB) (enter git-tempfile && indent cargo diet -n --package-size-limit 25KB) @@ -36,7 +36,7 @@ echo "in root: gitoxide CLI" (enter git-object && indent cargo diet -n --package-size-limit 25KB) (enter git-commitgraph && indent cargo diet -n --package-size-limit 25KB) (enter git-pack && indent cargo diet -n --package-size-limit 115KB) -(enter git-odb && indent cargo diet -n --package-size-limit 115KB) +(enter git-odb && indent cargo diet -n --package-size-limit 120KB) (enter git-protocol && indent cargo diet -n --package-size-limit 50KB) (enter git-packetline && indent cargo diet -n --package-size-limit 35KB) (enter git-repository && indent cargo diet -n --package-size-limit 80KB) diff --git a/git-worktree/tests/worktree/index/checkout.rs b/git-worktree/tests/worktree/index/checkout.rs index 0b5cfc990c6..6b2de51c025 100644 --- a/git-worktree/tests/worktree/index/checkout.rs +++ b/git-worktree/tests/worktree/index/checkout.rs @@ -332,10 +332,10 @@ fn no_case_related_collisions_on_case_sensitive_filesystem() { } #[test] -fn collisions_are_detected_on_a_case_sensitive_filesystem() { +fn collisions_are_detected_on_a_case_insensitive_filesystem() { let opts = opts_from_probe(); if !opts.fs.ignore_case { - eprintln!("Skipping case-insensitive testing on what would be a case-senstive file system"); + eprintln!("Skipping case-insensitive testing on what would be a case-sensitive file system"); return; } let (source_tree, destination, _index, outcome) = @@ -349,11 +349,20 @@ fn collisions_are_detected_on_a_case_sensitive_filesystem() { ); let dest_files = dir_structure(&destination); - assert_eq!( - stripped_prefix(&destination, &dest_files), - paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), - "we checkout files in order and generally handle collision detection differently, hence the difference" - ); + let multi_threaded = git_features::parallel::num_threads(None) > 1; + if multi_threaded { + assert_eq!( + dest_files.len(), + 5, + "can only assert on number as it's racily creating files so unclear which one clashes" + ); + } else { + assert_eq!( + stripped_prefix(&destination, &dest_files), + paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), + "we checkout files in order and generally handle collision detection differently, hence the difference" + ); + } let error_kind = ErrorKind::AlreadyExists; #[cfg(windows)] @@ -361,32 +370,40 @@ fn collisions_are_detected_on_a_case_sensitive_filesystem() { #[cfg(not(windows))] let error_kind_dir = error_kind; - assert_eq!( - sort_when_threaded(outcome.collisions), - sort_when_threaded(vec![ - Collision { - path: "FILE_x".into(), - error_kind, - }, - Collision { - path: "d".into(), - error_kind: error_kind_dir, - }, - Collision { - path: "file_X".into(), - error_kind, - }, - Collision { - path: "file_x".into(), - error_kind, - }, - Collision { - path: "x".into(), - error_kind, - }, - ]), - "these files couldn't be checked out" - ); + if multi_threaded { + assert_eq!( + outcome.collisions.len(), + 5, + "can only assert on number as it's racily creating files so unclear which one clashes" + ); + } else { + assert_eq!( + outcome.collisions, + vec![ + Collision { + path: "FILE_x".into(), + error_kind, + }, + Collision { + path: "d".into(), + error_kind: error_kind_dir, + }, + Collision { + path: "file_X".into(), + error_kind, + }, + Collision { + path: "file_x".into(), + error_kind, + }, + Collision { + path: "x".into(), + error_kind, + }, + ], + "these files couldn't be checked out" + ); + } } fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result {