From 038687127ac6eecdd229fe054df3c050612837cd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 16 Mar 2024 22:18:27 +0100 Subject: [PATCH 01/11] fix: `Search::can_match_relative_path()` won't report false-negatives anymore. It does this by ignoring exclude patterns, preferring false-positives that way. --- gix-pathspec/src/search/matching.rs | 8 ++--- gix-pathspec/tests/search/mod.rs | 51 +++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/gix-pathspec/src/search/matching.rs b/gix-pathspec/src/search/matching.rs index 37a14a34b1a..fee80e88491 100644 --- a/gix-pathspec/src/search/matching.rs +++ b/gix-pathspec/src/search/matching.rs @@ -176,11 +176,11 @@ impl Search { matches!(pattern.path.get(common_len), None | Some(&b'/')) } else { relative_path.get(common_len) == Some(&b'/') - } + }; } } } - if is_match { + if is_match && (!pattern.is_excluded() || pattern.always_matches()) { return !pattern.is_excluded(); } } @@ -191,7 +191,7 @@ impl Search { /// Returns `true` if `relative_path` matches the prefix of this pathspec. /// /// For example, the relative path `d` matches `d/`, `d*/`, `d/` and `d/*`, but not `d/d/*` or `dir`. - /// When `leading` is `true`, then `d` matches `d/d` as well. Thus `relative_path` must may be + /// When `leading` is `true`, then `d` matches `d/d` as well. Thus, `relative_path` must may be /// partially included in `pathspec`, otherwise it has to be fully included. pub fn directory_matches_prefix(&self, relative_path: &BStr, leading: bool) -> bool { if self.patterns.is_empty() { @@ -233,7 +233,7 @@ impl Search { }; } } - if is_match { + if is_match && (!pattern.is_excluded() || pattern.always_matches()) { return !pattern.is_excluded(); } } diff --git a/gix-pathspec/tests/search/mod.rs b/gix-pathspec/tests/search/mod.rs index 89f51a320ae..f2c4ee8aaf4 100644 --- a/gix-pathspec/tests/search/mod.rs +++ b/gix-pathspec/tests/search/mod.rs @@ -135,6 +135,47 @@ fn no_pathspecs_match_everything() -> crate::Result { Ok(()) } +#[test] +fn included_directory_and_excluded_subdir_top_level_with_prefix() -> crate::Result { + let mut search = gix_pathspec::Search::from_specs(pathspecs(&[":/foo", ":!/foo/target/"]), None, Path::new("foo"))?; + let m = search + .pattern_matching_relative_path("foo".into(), Some(true), &mut no_attrs) + .expect("matches"); + assert_eq!(m.kind, Verbatim); + + let m = search + .pattern_matching_relative_path("foo/bar".into(), Some(false), &mut no_attrs) + .expect("matches"); + assert_eq!(m.kind, Prefix); + + let m = search + .pattern_matching_relative_path("foo/target".into(), Some(false), &mut no_attrs) + .expect("matches"); + assert_eq!(m.kind, Prefix, "files named `target` are allowed"); + + let m = search + .pattern_matching_relative_path("foo/target".into(), Some(true), &mut no_attrs) + .expect("matches"); + assert!(m.is_excluded(), "directories named `target` are excluded"); + assert_eq!(m.kind, Verbatim); + + let m = search + .pattern_matching_relative_path("foo/target/file".into(), Some(false), &mut no_attrs) + .expect("matches"); + assert!(m.is_excluded(), "everything below `target/` is also excluded"); + assert_eq!(m.kind, Prefix); + + assert!(search.directory_matches_prefix("foo/bar".into(), false)); + assert!(search.directory_matches_prefix("foo/bar".into(), true)); + assert!(search.directory_matches_prefix("foo".into(), false)); + assert!(search.directory_matches_prefix("foo".into(), true)); + assert!(search.can_match_relative_path("foo".into(), Some(true))); + assert!(search.can_match_relative_path("foo".into(), Some(false))); + assert!(search.can_match_relative_path("foo/hi".into(), Some(true))); + assert!(search.can_match_relative_path("foo/hi".into(), Some(false))); + Ok(()) +} + #[test] fn starts_with() -> crate::Result { let mut search = gix_pathspec::Search::from_specs(pathspecs(&["a/*"]), None, Path::new(""))?; @@ -261,8 +302,14 @@ fn simplified_search_respects_all_excluded() -> crate::Result { None, Path::new(""), )?; - assert!(!search.can_match_relative_path("b".into(), None)); - assert!(!search.can_match_relative_path("a".into(), None)); + assert!( + search.can_match_relative_path("b".into(), None), + "non-trivial excludes are ignored in favor of false-positives" + ); + assert!( + search.can_match_relative_path("a".into(), None), + "non-trivial excludes are ignored in favor of false-positives" + ); assert!(search.can_match_relative_path("c".into(), None)); assert!(search.can_match_relative_path("c/".into(), None)); From cd0c8af78fd7a4f06e33ec2ce06b094b5a490877 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 17 Mar 2024 09:19:48 +0100 Subject: [PATCH 02/11] feat!: assure symlinks to directories are ignored with `dir/` declarations in `.gitignore`. Initially, symlinks appear like symlinks thanks to `lstat`, but to do exclude handling correctly these need another `stat` call. However, this is also not done in Git, but done in `libgit2` only, so a toggle was added to act like Git by default, but allow obtaining the same behaviour as git2 for compatibility. --- gix-dir/src/walk/classify.rs | 12 +++- gix-dir/src/walk/mod.rs | 8 +++ gix-dir/tests/fixtures/many-symlinks.sh | 24 ++++++++ gix-dir/tests/walk/mod.rs | 75 +++++++++++++++++++++++++ gix-dir/tests/walk_utils/mod.rs | 1 + 5 files changed, 119 insertions(+), 1 deletion(-) diff --git a/gix-dir/src/walk/classify.rs b/gix-dir/src/walk/classify.rs index 3d5f5c3f913..df0abf2d3e8 100644 --- a/gix-dir/src/walk/classify.rs +++ b/gix-dir/src/walk/classify.rs @@ -134,6 +134,7 @@ pub fn path( emit_ignored, for_deletion, classify_untracked_bare_repositories, + symlinks_to_directories_are_ignored_like_directories, .. }: Options, ctx: &mut Context<'_>, @@ -199,6 +200,15 @@ pub fn path( .pattern_matching_relative_path(rela_path.as_bstr(), kind.map(|ft| ft.is_dir()), ctx.pathspec_attributes) .map(Into::into); + let is_dir = if symlinks_to_directories_are_ignored_like_directories + && ctx.excludes.is_some() + && kind.map_or(false, |ft| ft == entry::Kind::Symlink) + { + path.metadata().ok().map(|md| md.is_dir()).or(Some(false)) + } else { + kind.map(|ft| ft.is_dir()) + }; + let mut maybe_upgrade_to_repository = |current_kind, find_harder: bool| { if recurse_repositories { return current_kind; @@ -245,7 +255,7 @@ pub fn path( .as_mut() .map_or(Ok(None), |stack| { stack - .at_entry(rela_path.as_bstr(), kind.map(|ft| ft.is_dir()), ctx.objects) + .at_entry(rela_path.as_bstr(), is_dir, ctx.objects) .map(|platform| platform.excluded_kind()) }) .map_err(Error::ExcludesAccess)? diff --git a/gix-dir/src/walk/mod.rs b/gix-dir/src/walk/mod.rs index 026edd485c3..a34457fc839 100644 --- a/gix-dir/src/walk/mod.rs +++ b/gix-dir/src/walk/mod.rs @@ -183,6 +183,14 @@ pub struct Options { pub emit_empty_directories: bool, /// If `None`, no entries inside of collapsed directories are emitted. Otherwise, act as specified by `Some(mode)`. pub emit_collapsed: Option, + /// This is a `libgit2` compatibility flag, and if enabled, symlinks that point to directories will be considered a directory + /// when checking for exclusion. + /// + /// This is relevant if `src2` points to `src`, and is excluded with `src2/`. If `false`, `src2` will not be excluded, + /// if `true` it will be excluded as the symlink is considered a directory. + /// + /// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`. + pub symlinks_to_directories_are_ignored_like_directories: bool, } /// All information that is required to perform a dirwalk, and classify paths properly. diff --git a/gix-dir/tests/fixtures/many-symlinks.sh b/gix-dir/tests/fixtures/many-symlinks.sh index f38abf74d18..77320550c39 100644 --- a/gix-dir/tests/fixtures/many-symlinks.sh +++ b/gix-dir/tests/fixtures/many-symlinks.sh @@ -17,3 +17,27 @@ git init immediate-breakout-symlink (cd immediate-breakout-symlink ln -s .. breakout ) + +git init excluded-symlinks-to-dir +(cd excluded-symlinks-to-dir + cat <.gitignore +src1 +src2/ +file1 +file2/ +ignored +ignored-must-be-dir/ +EOF + git add .gitignore && git commit -m "init" + + mkdir src + >src/file + + mkdir ignored-must-be-dir ignored + touch ignored-must-be-dir/file ignored/file + + ln -s src src1 + ln -s src src2 + ln -s src/file file1 + ln -s src/file file2 +) diff --git a/gix-dir/tests/walk/mod.rs b/gix-dir/tests/walk/mod.rs index 34e8ec7d42f..93545524be7 100644 --- a/gix-dir/tests/walk/mod.rs +++ b/gix-dir/tests/walk/mod.rs @@ -16,6 +16,81 @@ use gix_dir::walk::EmissionMode::*; use gix_dir::walk::ForDeletionMode; use gix_ignore::Kind::*; +#[test] +#[cfg_attr(windows, ignore = "symlinks the way they are organized don't yet work on windows")] +fn symlink_to_dir_can_be_excluded() -> crate::Result { + let root = fixture_in("many-symlinks", "excluded-symlinks-to-dir"); + let ((out, _root), entries) = collect(&root, None, |keep, ctx| { + walk( + &root, + ctx, + gix_dir::walk::Options { + emit_ignored: Some(Matching), + ..options() + }, + keep, + ) + }); + assert_eq!( + out, + walk::Outcome { + read_dir_calls: 2, + returned_entries: entries.len(), + seen_entries: 9, + } + ); + + assert_eq!( + entries, + &[ + entry("file1", Ignored(Expendable), Symlink), + entry("file2", Untracked, Symlink), + entry("ignored", Ignored(Expendable), Directory), + entry("ignored-must-be-dir", Ignored(Expendable), Directory), + entry("src/file", Untracked, File), + entry("src1", Ignored(Expendable), Symlink), + entry("src2", Untracked, Symlink), /* marked as src2/ in .gitignore */ + ], + "by default, symlinks are counted as files only, even if they point to a directory, when handled by the exclude machinery" + ); + + let ((out, _root), entries) = collect(&root, None, |keep, ctx| { + walk( + &root, + ctx, + gix_dir::walk::Options { + emit_ignored: Some(Matching), + symlinks_to_directories_are_ignored_like_directories: true, + ..options() + }, + keep, + ) + }); + assert_eq!( + out, + walk::Outcome { + read_dir_calls: 2, + returned_entries: entries.len(), + seen_entries: 9, + } + ); + + assert_eq!( + entries, + &[ + entry("file1", Ignored(Expendable), Symlink), + entry("file2", Untracked, Symlink), + entry("ignored", Ignored(Expendable), Directory), + entry("ignored-must-be-dir", Ignored(Expendable), Directory), + entry("src/file", Untracked, File), + entry("src1", Ignored(Expendable), Symlink), + entry("src2", Ignored(Expendable), Symlink), /* marked as src2/ in .gitignore */ + ], + "with libgit2 compatibility enabled, symlinks to directories are treated like a directory, not symlink" + ); + Ok(()) +} + #[test] #[cfg_attr(windows, ignore = "symlinks the way they are organized don't yet work on windows")] fn root_may_not_lead_through_symlinks() -> crate::Result { diff --git a/gix-dir/tests/walk_utils/mod.rs b/gix-dir/tests/walk_utils/mod.rs index 13ffcb36851..df81d082267 100644 --- a/gix-dir/tests/walk_utils/mod.rs +++ b/gix-dir/tests/walk_utils/mod.rs @@ -31,6 +31,7 @@ pub fn options_emit_all() -> walk::Options { emit_untracked: walk::EmissionMode::Matching, emit_empty_directories: true, emit_collapsed: None, + symlinks_to_directories_are_ignored_like_directories: false, } } From b90ab3dd5e8986e28624f3e1cf54f8a9171ce9f0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 17 Mar 2024 10:29:35 +0100 Subject: [PATCH 03/11] adapt to changes in `gix-dir` --- gix/src/dirwalk.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/gix/src/dirwalk.rs b/gix/src/dirwalk.rs index ab159cff2f5..98a655899f3 100644 --- a/gix/src/dirwalk.rs +++ b/gix/src/dirwalk.rs @@ -17,6 +17,7 @@ pub struct Options { emit_empty_directories: bool, classify_untracked_bare_repositories: bool, emit_collapsed: Option, + symlinks_to_directories_are_ignored_like_directories: bool, pub(crate) empty_patterns_match_prefix: bool, } @@ -36,6 +37,7 @@ impl Options { classify_untracked_bare_repositories: false, emit_collapsed: None, empty_patterns_match_prefix: false, + symlinks_to_directories_are_ignored_like_directories: false, } } } @@ -54,6 +56,8 @@ impl From for gix_dir::walk::Options { emit_empty_directories: v.emit_empty_directories, classify_untracked_bare_repositories: v.classify_untracked_bare_repositories, emit_collapsed: v.emit_collapsed, + symlinks_to_directories_are_ignored_like_directories: v + .symlinks_to_directories_are_ignored_like_directories, } } } @@ -180,4 +184,23 @@ impl Options { self.emit_collapsed = value; self } + + /// This is a `libgit2` compatibility flag, and if enabled, symlinks that point to directories will be considered a directory + /// when checking for exclusion. + /// + /// This is relevant if `src2` points to `src`, and is excluded with `src2/`. If `false`, `src2` will not be excluded, + /// if `true` it will be excluded as the symlink is considered a directory. + /// + /// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`. + pub fn symlinks_to_directories_are_ignored_like_directories(&mut self, toggle: bool) -> &mut Self { + self.symlinks_to_directories_are_ignored_like_directories = toggle; + self + } + + /// Like [`symlinks_to_directories_are_ignored_like_directories()`](Self::symlinks_to_directories_are_ignored_like_directories), + /// but only requires a mutably borrowed instance. + pub fn set_symlinks_to_directories_are_ignored_like_directories(&mut self, value: bool) -> &mut Self { + self.symlinks_to_directories_are_ignored_like_directories = value; + self + } } From b05af9f86119ba5c71a19d632312fd66edb30a0e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 17 Mar 2024 13:57:24 +0100 Subject: [PATCH 04/11] remove a TODO as it doesn't make sense anymore. The mode can't be an enum as it would degenerate information, possibly, which is something we really have to avoid. If an enum is desired one day, have a `.kind()` method that creates an enum from the bitflag. --- gix-index/src/entry/mod.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/gix-index/src/entry/mod.rs b/gix-index/src/entry/mod.rs index 15fc1d67e36..75c8fa67121 100644 --- a/gix-index/src/entry/mod.rs +++ b/gix-index/src/entry/mod.rs @@ -20,9 +20,6 @@ mod write; use bitflags::bitflags; -// TODO: we essentially treat this as an enum with the only exception being -// that `FILE_EXECUTABLE.contains(FILE)` works might want to turn this into an -// enum proper bitflags! { /// The kind of file of an entry. #[derive(Copy, Clone, Debug, PartialEq, Eq)] From e7e91cfaed6d40a773a65fc077b99d2e26bb28f5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 17 Mar 2024 15:30:17 +0100 Subject: [PATCH 05/11] fix: allow traversals to start from a symlink that points to a directory Now symlinked repositories can be traversed as well. --- gix-dir/src/walk/function.rs | 18 ++++++++++---- gix-dir/tests/fixtures/many-symlinks.sh | 2 ++ gix-dir/tests/walk/mod.rs | 33 +++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/gix-dir/src/walk/function.rs b/gix-dir/src/walk/function.rs index 5d75311eaf3..0e9a03950c2 100644 --- a/gix-dir/src/walk/function.rs +++ b/gix-dir/src/walk/function.rs @@ -69,9 +69,16 @@ pub fn walk( )?; if !can_recurse( buf.as_bstr(), - root_info, + if root == worktree_root && root_info.disk_kind == Some(entry::Kind::Symlink) && current.is_dir() { + classify::Outcome { + disk_kind: Some(entry::Kind::Directory), + ..root_info + } + } else { + root_info + }, options.for_deletion, - worktree_root_is_repository, /* is root */ + worktree_root_is_repository, delegate, ) { if buf.is_empty() && !root_info.disk_kind.map_or(false, |kind| kind.is_dir()) { @@ -147,16 +154,17 @@ pub(super) fn can_recurse( rela_path: &BStr, info: classify::Outcome, for_deletion: Option, - is_root: bool, + worktree_root_is_repository: bool, delegate: &mut dyn Delegate, ) -> bool { - if info.disk_kind.map_or(true, |k| !k.is_dir()) { + let is_dir = info.disk_kind.map_or(false, |k| k.is_dir()); + if !is_dir { return false; } delegate.can_recurse( EntryRef::from_outcome(Cow::Borrowed(rela_path), info), for_deletion, - is_root, + worktree_root_is_repository, ) } diff --git a/gix-dir/tests/fixtures/many-symlinks.sh b/gix-dir/tests/fixtures/many-symlinks.sh index 77320550c39..89cda9a0bcd 100644 --- a/gix-dir/tests/fixtures/many-symlinks.sh +++ b/gix-dir/tests/fixtures/many-symlinks.sh @@ -41,3 +41,5 @@ EOF ln -s src/file file1 ln -s src/file file2 ) + +ln -s excluded-symlinks-to-dir worktree-root-is-symlink \ No newline at end of file diff --git a/gix-dir/tests/walk/mod.rs b/gix-dir/tests/walk/mod.rs index 93545524be7..f5acd3367e5 100644 --- a/gix-dir/tests/walk/mod.rs +++ b/gix-dir/tests/walk/mod.rs @@ -118,6 +118,39 @@ fn root_may_not_lead_through_symlinks() -> crate::Result { Ok(()) } +#[test] +#[cfg_attr(windows, ignore = "symlinks the way they are organized don't yet work on windows")] +fn root_may_be_a_symlink_if_it_is_the_worktree() -> crate::Result { + let root = fixture_in("many-symlinks", "worktree-root-is-symlink"); + let ((_out, _root), entries) = collect(&root, None, |keep, ctx| { + walk( + &root, + ctx, + gix_dir::walk::Options { + emit_ignored: Some(Matching), + symlinks_to_directories_are_ignored_like_directories: true, + ..options() + }, + keep, + ) + }); + + assert_eq!( + entries, + &[ + entry("file1", Ignored(Expendable), Symlink), + entry("file2", Untracked, Symlink), + entry("ignored", Ignored(Expendable), Directory), + entry("ignored-must-be-dir", Ignored(Expendable), Directory), + entry("src/file", Untracked, File), + entry("src1", Ignored(Expendable), Symlink), + entry("src2", Ignored(Expendable), Symlink), /* marked as src2/ in .gitignore */ + ], + "it traversed the directory normally - without this capability, symlinked repos can't be traversed" + ); + Ok(()) +} + #[test] fn empty_root() -> crate::Result { let root = fixture("empty"); From 35b74e7992a5a732b5ae8dbdc264479a91b1d60d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 17 Mar 2024 19:33:06 +0100 Subject: [PATCH 06/11] feat!: allow directory walk to be interrupted with `should_interrupt` flag. That way, it can be much more responsive to interruption. --- gix-dir/src/walk/mod.rs | 7 +++++++ gix-dir/src/walk/readdir.rs | 4 ++++ gix-dir/tests/walk/mod.rs | 19 +++++++++++++++++++ gix-dir/tests/walk_utils/mod.rs | 10 +++++++++- 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/gix-dir/src/walk/mod.rs b/gix-dir/src/walk/mod.rs index a34457fc839..c090e4b7d2c 100644 --- a/gix-dir/src/walk/mod.rs +++ b/gix-dir/src/walk/mod.rs @@ -1,6 +1,7 @@ use crate::{entry, EntryRef}; use bstr::BStr; use std::path::PathBuf; +use std::sync::atomic::AtomicBool; /// A type returned by the [`Delegate::emit()`] as passed to [`walk()`](function::walk()). #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -195,6 +196,10 @@ pub struct Options { /// All information that is required to perform a dirwalk, and classify paths properly. pub struct Context<'a> { + /// If not `None`, it will be checked before entering any directory to trigger early interruption. + /// + /// If this flag is `true` at any point in the iteration, it will abort with an error. + pub should_interrupt: Option<&'a AtomicBool>, /// The `git_dir` of the parent repository, after a call to [`gix_path::realpath()`]. /// /// It's used to help us differentiate our own `.git` directory from nested unrelated repositories, @@ -269,6 +274,8 @@ pub struct Outcome { #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { + #[error("Interrupted")] + Interrupted, #[error("Worktree root at '{}' is not a directory", root.display())] WorktreeRootIsFile { root: PathBuf }, #[error("Traversal root '{}' contains relative path components and could not be normalized", root.display())] diff --git a/gix-dir/src/walk/readdir.rs b/gix-dir/src/walk/readdir.rs index cda368a05fc..0618822a652 100644 --- a/gix-dir/src/walk/readdir.rs +++ b/gix-dir/src/walk/readdir.rs @@ -1,6 +1,7 @@ use bstr::{BStr, BString, ByteSlice}; use std::borrow::Cow; use std::path::{Path, PathBuf}; +use std::sync::atomic::Ordering; use crate::entry::{PathspecMatch, Status}; use crate::walk::function::{can_recurse, emit_entry}; @@ -23,6 +24,9 @@ pub(super) fn recursive( out: &mut Outcome, state: &mut State, ) -> Result<(Action, bool), Error> { + if ctx.should_interrupt.map_or(false, |flag| flag.load(Ordering::Relaxed)) { + return Err(Error::Interrupted); + } out.read_dir_calls += 1; let entries = gix_fs::read_dir(current, opts.precompose_unicode).map_err(|err| Error::ReadDir { path: current.to_owned(), diff --git a/gix-dir/tests/walk/mod.rs b/gix-dir/tests/walk/mod.rs index f5acd3367e5..3740a014dc4 100644 --- a/gix-dir/tests/walk/mod.rs +++ b/gix-dir/tests/walk/mod.rs @@ -1,5 +1,6 @@ use gix_dir::{walk, EntryRef}; use pretty_assertions::assert_eq; +use std::sync::atomic::AtomicBool; use crate::walk_utils::{ collect, collect_filtered, collect_filtered_with_cwd, entry, entry_dirstat, entry_nokind, entry_nomatch, entryps, @@ -151,6 +152,24 @@ fn root_may_be_a_symlink_if_it_is_the_worktree() -> crate::Result { Ok(()) } +#[test] +fn should_interrupt_works_even_in_empty_directories() { + let root = fixture("empty"); + let should_interrupt = AtomicBool::new(true); + let err = try_collect_filtered_opts_collect( + &root, + None, + |keep, ctx| walk(&root, ctx, gix_dir::walk::Options { ..options() }, keep), + None::<&str>, + Options { + should_interrupt: Some(&should_interrupt), + ..Default::default() + }, + ) + .unwrap_err(); + assert!(matches!(err, gix_dir::walk::Error::Interrupted)); +} + #[test] fn empty_root() -> crate::Result { let root = fixture("empty"); diff --git a/gix-dir/tests/walk_utils/mod.rs b/gix-dir/tests/walk_utils/mod.rs index df81d082267..ee279d4d857 100644 --- a/gix-dir/tests/walk_utils/mod.rs +++ b/gix-dir/tests/walk_utils/mod.rs @@ -2,6 +2,7 @@ use bstr::BStr; use gix_dir::{entry, walk, Entry}; use gix_testtools::scripted_fixture_read_only; use std::path::{Path, PathBuf}; +use std::sync::atomic::AtomicBool; pub fn fixture_in(filename: &str, name: &str) -> PathBuf { let root = scripted_fixture_read_only(format!("{filename}.sh")).expect("script works"); @@ -269,7 +270,11 @@ pub fn try_collect_filtered_opts( cb: impl FnOnce(&mut dyn walk::Delegate, walk::Context) -> Result<(walk::Outcome, PathBuf), walk::Error>, patterns: impl IntoIterator>, delegate: &mut dyn gix_dir::walk::Delegate, - Options { fresh_index, git_dir }: Options<'_>, + Options { + fresh_index, + git_dir, + should_interrupt, + }: Options<'_>, ) -> Result<(walk::Outcome, PathBuf), walk::Error> { let git_dir = worktree_root.join(git_dir.unwrap_or(".git")); let mut index = std::fs::read(git_dir.join("index")).ok().map_or_else( @@ -343,6 +348,7 @@ pub fn try_collect_filtered_opts( excludes: Some(&mut stack), objects: &gix_object::find::Never, explicit_traversal_root, + should_interrupt, }, ) } @@ -350,6 +356,7 @@ pub fn try_collect_filtered_opts( pub struct Options<'a> { pub fresh_index: bool, pub git_dir: Option<&'a str>, + pub should_interrupt: Option<&'a AtomicBool>, } impl<'a> Options<'a> { @@ -366,6 +373,7 @@ impl<'a> Default for Options<'a> { Options { fresh_index: true, git_dir: None, + should_interrupt: None, } } } From e48ba081ae1c76c05215725647399e0d060b7134 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 17 Mar 2024 20:15:15 +0100 Subject: [PATCH 07/11] adapt to changes in `gix-dir` --- gix-status/src/index_as_worktree_with_renames/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/gix-status/src/index_as_worktree_with_renames/mod.rs b/gix-status/src/index_as_worktree_with_renames/mod.rs index 39f255f3788..0932e7d4f4d 100644 --- a/gix-status/src/index_as_worktree_with_renames/mod.rs +++ b/gix-status/src/index_as_worktree_with_renames/mod.rs @@ -87,6 +87,7 @@ pub(super) mod function { gix_dir::walk( worktree, gix_dir::walk::Context { + should_interrupt: Some(ctx.should_interrupt), git_dir_realpath: dirwalk_ctx.git_dir_realpath, current_dir: dirwalk_ctx.current_dir, index, From ba3f2db0b65582a917466127dc16e4945104b01b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 17 Mar 2024 17:28:50 +0100 Subject: [PATCH 08/11] feat!: provide `Repository::dirwalk_iter()`. That way, more copying happens but the usability increases tremendously as well. It's breaking as public types moved from `repository::dirwalk` to `dirwalk`, dissolving `repository::dirwalk` entirely. --- gix/Cargo.toml | 2 +- gix/src/dirwalk/iter.rs | 188 +++++++++++++++++++++ gix/src/dirwalk/mod.rs | 91 ++++++++++ gix/src/{dirwalk.rs => dirwalk/options.rs} | 22 +-- gix/src/lib.rs | 4 +- gix/src/repository/dirwalk.rs | 71 ++++---- gix/src/repository/mod.rs | 2 +- gix/src/status/index_worktree.rs | 32 +--- gix/src/status/mod.rs | 36 +--- gix/src/status/platform.rs | 6 +- gix/src/util.rs | 81 +++++++++ gix/src/worktree/mod.rs | 16 +- gix/tests/repository/mod.rs | 43 ++++- gix/tests/repository/open.rs | 10 +- justfile | 1 + 15 files changed, 472 insertions(+), 133 deletions(-) create mode 100644 gix/src/dirwalk/iter.rs create mode 100644 gix/src/dirwalk/mod.rs rename gix/src/{dirwalk.rs => dirwalk/options.rs} (92%) create mode 100644 gix/src/util.rs diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 8f7f6c9327f..a2c1decaa6d 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -74,7 +74,7 @@ interrupt = ["dep:signal-hook", "gix-tempfile/signals"] index = ["dep:gix-index"] ## Support directory walks with Git-style annoations. -dirwalk = ["dep:gix-dir"] +dirwalk = ["dep:gix-dir", "attributes", "excludes"] ## Access to credential helpers, which provide credentials for URLs. # Note that `gix-negotiate` just piggibacks here, as 'credentials' is equivalent to 'fetch & push' right now. diff --git a/gix/src/dirwalk/iter.rs b/gix/src/dirwalk/iter.rs new file mode 100644 index 00000000000..6bfde0ef8f7 --- /dev/null +++ b/gix/src/dirwalk/iter.rs @@ -0,0 +1,188 @@ +use super::Iter; +use crate::bstr::BString; +use crate::util::OwnedOrStaticAtomicBool; +use crate::worktree::IndexPersistedOrInMemory; +use crate::{dirwalk, PathspecDetached, Repository}; +use std::path::PathBuf; + +/// An entry of the directory walk as returned by the [iterator](Iter). +pub struct Item { + /// The directory entry. + pub entry: gix_dir::Entry, + /// `collapsed_directory_status` is `Some(dir_status)` if this entry was part of a directory with the given + /// `dir_status` that wasn't the same as the one of `entry` and if [gix_dir::walk::Options::emit_collapsed] was + /// [gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch]. It will also be `Some(dir_status)` if that option + /// was [gix_dir::walk::CollapsedEntriesEmissionMode::All]. + pub collapsed_directory_status: Option, +} + +impl Item { + fn new(entry: gix_dir::EntryRef<'_>, collapsed_directory_status: Option) -> Self { + Item { + entry: entry.to_owned(), + collapsed_directory_status, + } + } +} + +/// The outcome of fully consumed [dirwalk iterator](Iter). +pub struct Outcome { + /// The index originally passed in to create the iterator. + pub index: IndexPersistedOrInMemory, + /// The excludes stack used for the dirwalk, for access of `.gitignore` information. + pub excludes: gix_worktree::Stack, + /// The pathspecs used to guide the operation, + pub pathspec: PathspecDetached, + /// The root actually being used for the traversal, and useful to transform the paths returned for the user. + /// It's always within the [`work-dir`](Repository::work_dir). + pub traversal_root: PathBuf, + /// The actual result of the dirwalk. + pub dirwalk: gix_dir::walk::Outcome, +} + +/// The error returned by [Repository::dirwalk_iter()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Failed to spawn producer thread")] + #[cfg(feature = "parallel")] + SpawnThread(#[from] std::io::Error), + #[error(transparent)] + #[cfg(not(feature = "parallel"))] + Dirwalk(#[from] dirwalk::Error), + #[error(transparent)] + #[cfg(not(feature = "parallel"))] + DetachPathSpec(#[from] std::io::Error), +} + +/// Lifecycle +impl Iter { + pub(crate) fn new( + repo: &Repository, + index: IndexPersistedOrInMemory, + patterns: Vec, + should_interrupt: OwnedOrStaticAtomicBool, + options: dirwalk::Options, + ) -> Result { + #[cfg(feature = "parallel")] + { + let repo = repo.clone().into_sync(); + let (tx, rx) = std::sync::mpsc::channel(); + let handle = std::thread::Builder::new() + .name("gix::dirwalk::iter::producer".into()) + .spawn({ + let should_interrupt = should_interrupt.clone(); + move || -> Result { + let repo: Repository = repo.into(); + let mut collect = Collect { tx }; + let out = repo.dirwalk(&index, patterns, &should_interrupt, options, &mut collect)?; + Ok(Outcome { + index, + excludes: out.excludes.detach(), + pathspec: out.pathspec.detach().map_err(|err| { + dirwalk::Error::Walk(gix_dir::walk::Error::ReadDir { + path: repo.git_dir().to_owned(), + source: err, + }) + })?, + traversal_root: out.traversal_root, + dirwalk: out.dirwalk, + }) + } + })?; + + Ok(Iter { + rx_and_join: Some((rx, handle)), + should_interrupt, + out: None, + }) + } + #[cfg(not(feature = "parallel"))] + { + let mut collect = Collect { items: Vec::new() }; + let out = repo.dirwalk(&index, patterns, &should_interrupt, options, &mut collect)?; + let out = Outcome { + index, + excludes: out.excludes.detach(), + pathspec: out.pathspec.detach()?, + traversal_root: out.traversal_root, + dirwalk: out.dirwalk, + }; + + Ok(Iter { + items: collect.items.into_iter(), + out: Some(out), + }) + } + } +} + +/// Access +impl Iter { + /// Return the outcome of the iteration, or `None` if the iterator isn't fully consumed. + pub fn outcome_mut(&mut self) -> Option<&mut Outcome> { + self.out.as_mut() + } + + /// Turn the iterator into the iteration outcome, which is `None` on error or if the iteration + /// isn't complete. + pub fn into_outcome(mut self) -> Option { + self.out.take() + } +} + +impl Iterator for Iter { + type Item = Result; + + fn next(&mut self) -> Option { + #[cfg(feature = "parallel")] + { + let (rx, _join) = self.rx_and_join.as_ref()?; + match rx.recv().ok() { + Some(item) => Some(Ok(item)), + None => { + let (_rx, handle) = self.rx_and_join.take()?; + match handle.join().expect("no panic") { + Ok(out) => { + self.out = Some(out); + None + } + Err(err) => Some(Err(err)), + } + } + } + } + #[cfg(not(feature = "parallel"))] + self.items.next().map(Ok) + } +} + +#[cfg(feature = "parallel")] +impl Drop for Iter { + fn drop(&mut self) { + crate::util::parallel_iter_drop(self.rx_and_join.take(), &self.should_interrupt); + } +} + +struct Collect { + #[cfg(feature = "parallel")] + tx: std::sync::mpsc::Sender, + #[cfg(not(feature = "parallel"))] + items: Vec, +} + +impl gix_dir::walk::Delegate for Collect { + fn emit( + &mut self, + entry: gix_dir::EntryRef<'_>, + collapsed_directory_status: Option, + ) -> gix_dir::walk::Action { + // NOTE: we assume that the receiver triggers interruption so the operation will stop if the receiver is down. + let item = Item::new(entry, collapsed_directory_status); + #[cfg(feature = "parallel")] + self.tx.send(item).ok(); + #[cfg(not(feature = "parallel"))] + self.items.push(item); + gix_dir::walk::Action::Continue + } +} diff --git a/gix/src/dirwalk/mod.rs b/gix/src/dirwalk/mod.rs new file mode 100644 index 00000000000..4d590c3f36c --- /dev/null +++ b/gix/src/dirwalk/mod.rs @@ -0,0 +1,91 @@ +use gix_dir::walk::{CollapsedEntriesEmissionMode, EmissionMode, ForDeletionMode}; + +use crate::{config, AttributeStack, Pathspec}; +use std::path::PathBuf; + +mod options; + +/// +#[allow(clippy::empty_docs)] +pub mod iter; + +/// An iterator for entries in a directory walk. +/// +/// ### Parallel Operation +/// +/// Note that without the `parallel` feature, the iterator becomes 'serial', which means that all entries will be traversed +/// in advance and it cannot be interrupted unless the interrupt flag is set from another thread. +/// +/// It's a crutch that is just there to make single-threaded applications possible at all, as it's not really an iterator +/// anymore. If this matters, better run [Repository::dirwalk()](crate::Repository::dirwalk) by hand as it provides all +/// control one would need, just not as an iterator. +/// +/// Also, even with `parallel` set, the first call to `next()` will block until there is an item available, without a chance +/// to interrupt unless the interrupt flag is set from another thread. +pub struct Iter { + #[cfg(feature = "parallel")] + #[allow(clippy::type_complexity)] + rx_and_join: Option<( + std::sync::mpsc::Receiver, + std::thread::JoinHandle>, + )>, + #[cfg(feature = "parallel")] + should_interrupt: crate::util::OwnedOrStaticAtomicBool, + /// Without parallelization, the iterator has to buffer all changes in advance. + #[cfg(not(feature = "parallel"))] + items: std::vec::IntoIter, + /// The outcome of the operation, only available once the operation has ended. + out: Option, +} + +/// The error returned by [dirwalk()](crate::Repository::dirwalk()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + Walk(#[from] gix_dir::walk::Error), + #[error("A working tree is required to perform a directory walk")] + MissingWorkDir, + #[error(transparent)] + Excludes(#[from] config::exclude_stack::Error), + #[error(transparent)] + Pathspec(#[from] crate::pathspec::init::Error), + #[error(transparent)] + Prefix(#[from] gix_path::realpath::Error), + #[error(transparent)] + FilesystemOptions(#[from] config::boolean::Error), +} + +/// The outcome of the [dirwalk()](crate::Repository::dirwalk). +pub struct Outcome<'repo> { + /// The excludes stack used for the dirwalk, for access of `.gitignore` information. + pub excludes: AttributeStack<'repo>, + /// The pathspecs used to guide the operation, + pub pathspec: Pathspec<'repo>, + /// The root actually being used for the traversal, and useful to transform the paths returned for the user. + /// It's always within the [`work-dir`](crate::Repository::work_dir). + pub traversal_root: PathBuf, + /// The actual result of the dirwalk. + pub dirwalk: gix_dir::walk::Outcome, +} + +/// Options for use in the [`Repository::dirwalk()`](crate::Repository::dirwalk()) function. +/// +/// Note that all values start out disabled. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct Options { + precompose_unicode: bool, + ignore_case: bool, + + recurse_repositories: bool, + emit_pruned: bool, + emit_ignored: Option, + for_deletion: Option, + emit_tracked: bool, + emit_untracked: EmissionMode, + emit_empty_directories: bool, + classify_untracked_bare_repositories: bool, + emit_collapsed: Option, + symlinks_to_directories_are_ignored_like_directories: bool, + pub(crate) empty_patterns_match_prefix: bool, +} diff --git a/gix/src/dirwalk.rs b/gix/src/dirwalk/options.rs similarity index 92% rename from gix/src/dirwalk.rs rename to gix/src/dirwalk/options.rs index 98a655899f3..1ce6a262c53 100644 --- a/gix/src/dirwalk.rs +++ b/gix/src/dirwalk/options.rs @@ -1,26 +1,6 @@ +use crate::dirwalk::Options; use gix_dir::walk::{CollapsedEntriesEmissionMode, EmissionMode, ForDeletionMode}; -/// Options for use in the [`Repository::dirwalk()`](crate::Repository::dirwalk()) function. -/// -/// Note that all values start out disabled. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub struct Options { - precompose_unicode: bool, - ignore_case: bool, - - recurse_repositories: bool, - emit_pruned: bool, - emit_ignored: Option, - for_deletion: Option, - emit_tracked: bool, - emit_untracked: EmissionMode, - emit_empty_directories: bool, - classify_untracked_bare_repositories: bool, - emit_collapsed: Option, - symlinks_to_directories_are_ignored_like_directories: bool, - pub(crate) empty_patterns_match_prefix: bool, -} - /// Construction impl Options { pub(crate) fn from_fs_caps(caps: gix_fs::Capabilities) -> Self { diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 96bd413f8ff..e29ccbfc410 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -177,8 +177,8 @@ pub use types::{Pathspec, PathspecDetached, Submodule}; #[allow(clippy::empty_docs)] pub mod clone; pub mod commit; -#[cfg(feature = "dirwalk")] /// +#[cfg(feature = "dirwalk")] #[allow(clippy::empty_docs)] pub mod dirwalk; pub mod head; @@ -191,6 +191,8 @@ pub mod repository; #[cfg(feature = "attributes")] pub mod submodule; pub mod tag; +#[cfg(any(feature = "dirwalk", feature = "status"))] +pub(crate) mod util; /// #[allow(clippy::empty_docs)] diff --git a/gix/src/repository/dirwalk.rs b/gix/src/repository/dirwalk.rs index e0141613e7f..c2600227a33 100644 --- a/gix/src/repository/dirwalk.rs +++ b/gix/src/repository/dirwalk.rs @@ -1,37 +1,8 @@ -use crate::bstr::BStr; -use crate::{config, dirwalk, AttributeStack, Pathspec, Repository}; -use std::path::PathBuf; - -/// The error returned by [dirwalk()](Repository::dirwalk()). -#[derive(Debug, thiserror::Error)] -#[allow(missing_docs)] -pub enum Error { - #[error(transparent)] - Walk(#[from] gix_dir::walk::Error), - #[error("A working tree is required to perform a directory walk")] - MissingWorkDir, - #[error(transparent)] - Excludes(#[from] config::exclude_stack::Error), - #[error(transparent)] - Pathspec(#[from] crate::pathspec::init::Error), - #[error(transparent)] - Prefix(#[from] gix_path::realpath::Error), - #[error(transparent)] - FilesystemOptions(#[from] config::boolean::Error), -} - -/// The outcome of the [dirwalk()](Repository::dirwalk). -pub struct Outcome<'repo> { - /// The excludes stack used for the dirwalk, for access of `.gitignore` information. - pub excludes: AttributeStack<'repo>, - /// The pathspecs used to guide the operation, - pub pathspec: Pathspec<'repo>, - /// The root actually being used for the traversal, and useful to transform the paths returned for the user. - /// It's always within the [`work-dir`](Repository::work_dir). - pub traversal_root: PathBuf, - /// The actual result of the dirwalk. - pub dirwalk: gix_dir::walk::Outcome, -} +use crate::bstr::{BStr, BString}; +use crate::util::OwnedOrStaticAtomicBool; +use crate::worktree::IndexPersistedOrInMemory; +use crate::{config, dirwalk, Repository}; +use std::sync::atomic::AtomicBool; impl Repository { /// Return default options suitable for performing a directory walk on this repository. @@ -42,7 +13,8 @@ impl Repository { } /// Perform a directory walk configured with `options` under control of the `delegate`. Use `patterns` to - /// further filter entries. + /// further filter entries. `should_interrupt` is polled to see if an interrupt is requested, causing an + /// error to be returned instead. /// /// The `index` is used to determine if entries are tracked, and for excludes and attributes /// lookup. Note that items will only count as tracked if they have the [`gix_index::entry::Flags::UPTODATE`] @@ -53,11 +25,12 @@ impl Repository { &self, index: &gix_index::State, patterns: impl IntoIterator>, + should_interrupt: &AtomicBool, options: dirwalk::Options, delegate: &mut dyn gix_dir::walk::Delegate, - ) -> Result, Error> { + ) -> Result, dirwalk::Error> { let _span = gix_trace::coarse!("gix::dirwalk"); - let workdir = self.work_dir().ok_or(Error::MissingWorkDir)?; + let workdir = self.work_dir().ok_or(dirwalk::Error::MissingWorkDir)?; let mut excludes = self.excludes( index, None, @@ -78,6 +51,7 @@ impl Repository { let (outcome, traversal_root) = gix_dir::walk( workdir, gix_dir::walk::Context { + should_interrupt: Some(should_interrupt), git_dir_realpath: git_dir_realpath.as_ref(), current_dir: self.current_dir(), index, @@ -101,11 +75,32 @@ impl Repository { delegate, )?; - Ok(Outcome { + Ok(dirwalk::Outcome { dirwalk: outcome, traversal_root, excludes, pathspec, }) } + + /// Create an iterator over a running traversal, which stops if the iterator is dropped. All arguments + /// are the same as in [`dirwalk()`](Self::dirwalk). + /// + /// `should_interrupt` should be set to `Default::default()` if it is supposed to be unused. + /// Otherwise, it can be created by passing a `&'static AtomicBool`, `&Arc` or `Arc`. + pub fn dirwalk_iter( + &self, + index: impl Into, + patterns: impl IntoIterator>, + should_interrupt: OwnedOrStaticAtomicBool, + options: dirwalk::Options, + ) -> Result { + dirwalk::Iter::new( + self, + index.into(), + patterns.into_iter().map(Into::into).collect(), + should_interrupt, + options, + ) + } } diff --git a/gix/src/repository/mod.rs b/gix/src/repository/mod.rs index 68b9b491c74..9c2ffab4274 100644 --- a/gix/src/repository/mod.rs +++ b/gix/src/repository/mod.rs @@ -45,7 +45,7 @@ mod config; pub mod diff; /// #[cfg(feature = "dirwalk")] -pub mod dirwalk; +mod dirwalk; /// #[cfg(feature = "attributes")] pub mod filter; diff --git a/gix/src/status/index_worktree.rs b/gix/src/status/index_worktree.rs index 0248b3d27db..d95ab5a2438 100644 --- a/gix/src/status/index_worktree.rs +++ b/gix/src/status/index_worktree.rs @@ -297,7 +297,7 @@ pub struct Iter { std::thread::JoinHandle>, )>, #[cfg(feature = "parallel")] - should_interrupt: crate::status::OwnedOrStaticAtomic, + should_interrupt: crate::status::OwnedOrStaticAtomicBool, /// Without parallelization, the iterator has to buffer all changes in advance. #[cfg(not(feature = "parallel"))] items: std::vec::IntoIter, @@ -744,11 +744,18 @@ pub mod iter { } } + /// Access impl super::Iter { /// Return the outcome of the iteration, or `None` if the iterator isn't fully consumed. pub fn outcome_mut(&mut self) -> Option<&mut Outcome> { self.out.as_mut() } + + /// Turn the iterator into the iteration outcome, which is `None` on error or if the iteration + /// isn't complete. + pub fn into_outcome(mut self) -> Option { + self.out.take() + } } impl super::Iter { @@ -781,28 +788,7 @@ pub mod iter { #[cfg(feature = "parallel")] impl Drop for super::Iter { fn drop(&mut self) { - use crate::status::OwnedOrStaticAtomic; - let Some((rx, handle)) = self.rx_and_join.take() else { - return; - }; - let prev = self.should_interrupt.swap(true, std::sync::atomic::Ordering::Relaxed); - let undo = match &self.should_interrupt { - OwnedOrStaticAtomic::Shared(flag) => *flag, - OwnedOrStaticAtomic::Owned { flag, private: false } => flag.as_ref(), - OwnedOrStaticAtomic::Owned { private: true, .. } => { - // Leak the handle to let it shut down in the background, so drop returns more quickly. - drop((rx, handle)); - return; - } - }; - // Wait until there is time to respond before we undo the change. - handle.join().ok(); - undo.fetch_update( - std::sync::atomic::Ordering::SeqCst, - std::sync::atomic::Ordering::SeqCst, - |current| current.then_some(prev), - ) - .ok(); + crate::util::parallel_iter_drop(self.rx_and_join.take(), &self.should_interrupt); } } diff --git a/gix/src/status/mod.rs b/gix/src/status/mod.rs index 3a565f0ef13..134a16dafa9 100644 --- a/gix/src/status/mod.rs +++ b/gix/src/status/mod.rs @@ -1,9 +1,7 @@ use crate::config::cache::util::ApplyLeniencyDefault; +use crate::util::OwnedOrStaticAtomicBool; use crate::{config, Repository}; pub use gix_status as plumbing; -use std::ops::Deref; -use std::sync::atomic::AtomicBool; -use std::sync::Arc; /// A structure to hold options configuring the status request, which can then be turned into an iterator. pub struct Platform<'repo, Progress> @@ -15,37 +13,7 @@ where index: Option, submodules: Submodule, index_worktree_options: index_worktree::Options, - should_interrupt: Option, -} - -#[derive(Clone)] -enum OwnedOrStaticAtomic { - Owned { - flag: Arc, - #[cfg_attr(not(feature = "parallel"), allow(dead_code))] - private: bool, - }, - Shared(&'static AtomicBool), -} - -impl Default for OwnedOrStaticAtomic { - fn default() -> Self { - OwnedOrStaticAtomic::Owned { - flag: Arc::new(AtomicBool::default()), - private: true, - } - } -} - -impl Deref for OwnedOrStaticAtomic { - type Target = std::sync::atomic::AtomicBool; - - fn deref(&self) -> &Self::Target { - match self { - OwnedOrStaticAtomic::Owned { flag, .. } => flag, - OwnedOrStaticAtomic::Shared(flag) => flag, - } - } + should_interrupt: Option, } /// How to obtain a submodule's status. diff --git a/gix/src/status/platform.rs b/gix/src/status/platform.rs index 318db07435b..2b73ae172a3 100644 --- a/gix/src/status/platform.rs +++ b/gix/src/status/platform.rs @@ -1,4 +1,4 @@ -use crate::status::{index_worktree, OwnedOrStaticAtomic, Platform, Submodule, UntrackedFiles}; +use crate::status::{index_worktree, OwnedOrStaticAtomicBool, Platform, Submodule, UntrackedFiles}; use std::sync::atomic::AtomicBool; /// Builder @@ -45,7 +45,7 @@ where /// /// If it is `true`, the iteration will stop immediately. pub fn should_interrupt_shared(mut self, should_interrupt: &'static AtomicBool) -> Self { - self.should_interrupt = Some(OwnedOrStaticAtomic::Shared(should_interrupt)); + self.should_interrupt = Some(OwnedOrStaticAtomicBool::Shared(should_interrupt)); self } @@ -53,7 +53,7 @@ where /// /// If it is `true`, the iteration will stop immediately. pub fn should_interrupt_owned(mut self, should_interrupt: std::sync::Arc) -> Self { - self.should_interrupt = Some(OwnedOrStaticAtomic::Owned { + self.should_interrupt = Some(OwnedOrStaticAtomicBool::Owned { flag: should_interrupt, private: false, }); diff --git a/gix/src/util.rs b/gix/src/util.rs new file mode 100644 index 00000000000..e9d1e10f8e9 --- /dev/null +++ b/gix/src/util.rs @@ -0,0 +1,81 @@ +use std::ops::Deref; +use std::sync::atomic::AtomicBool; +use std::sync::Arc; + +#[derive(Clone)] +pub enum OwnedOrStaticAtomicBool { + Owned { + flag: Arc, + #[cfg_attr(not(feature = "parallel"), allow(dead_code))] + private: bool, + }, + Shared(&'static AtomicBool), +} + +impl Default for OwnedOrStaticAtomicBool { + fn default() -> Self { + OwnedOrStaticAtomicBool::Owned { + flag: Arc::new(AtomicBool::default()), + private: true, + } + } +} + +impl Deref for OwnedOrStaticAtomicBool { + type Target = std::sync::atomic::AtomicBool; + + fn deref(&self) -> &Self::Target { + match self { + OwnedOrStaticAtomicBool::Owned { flag, .. } => flag, + OwnedOrStaticAtomicBool::Shared(flag) => flag, + } + } +} + +impl From<&'static AtomicBool> for OwnedOrStaticAtomicBool { + fn from(value: &'static AtomicBool) -> Self { + OwnedOrStaticAtomicBool::Shared(value) + } +} + +impl<'a> From<&'a Arc> for OwnedOrStaticAtomicBool { + fn from(value: &'a Arc) -> Self { + OwnedOrStaticAtomicBool::Owned { + flag: value.clone(), + private: false, + } + } +} + +impl From> for OwnedOrStaticAtomicBool { + fn from(flag: Arc) -> Self { + OwnedOrStaticAtomicBool::Owned { flag, private: false } + } +} +#[cfg(feature = "parallel")] +pub fn parallel_iter_drop( + mut rx_and_join: Option<(std::sync::mpsc::Receiver, std::thread::JoinHandle)>, + should_interrupt: &OwnedOrStaticAtomicBool, +) { + let Some((rx, handle)) = rx_and_join.take() else { + return; + }; + let prev = should_interrupt.swap(true, std::sync::atomic::Ordering::Relaxed); + let undo = match &should_interrupt { + OwnedOrStaticAtomicBool::Shared(flag) => *flag, + OwnedOrStaticAtomicBool::Owned { flag, private: false } => flag.as_ref(), + OwnedOrStaticAtomicBool::Owned { private: true, .. } => { + // Leak the handle to let it shut down in the background, so drop returns more quickly. + drop((rx, handle)); + return; + } + }; + // Wait until there is time to respond before we undo the change. + handle.join().ok(); + undo.fetch_update( + std::sync::atomic::Ordering::SeqCst, + std::sync::atomic::Ordering::SeqCst, + |current| current.then_some(prev), + ) + .ok(); +} diff --git a/gix/src/worktree/mod.rs b/gix/src/worktree/mod.rs index 87be9ef32b4..8fbb8b66679 100644 --- a/gix/src/worktree/mod.rs +++ b/gix/src/worktree/mod.rs @@ -24,13 +24,27 @@ pub type Index = gix_fs::SharedFileSnapshot; #[cfg(feature = "index")] pub enum IndexPersistedOrInMemory { /// The index as loaded from disk, and shared across clones of the owning `Repository`. - Persisted(crate::worktree::Index), + Persisted(Index), /// A temporary index as created from the `HEAD^{tree}`, with the file path set to the place where it would be stored naturally. /// /// Note that unless saved explicitly, it will not persist. InMemory(gix_index::File), } +#[cfg(feature = "index")] +impl From for IndexPersistedOrInMemory { + fn from(value: Index) -> Self { + IndexPersistedOrInMemory::Persisted(value) + } +} + +#[cfg(feature = "index")] +impl From for IndexPersistedOrInMemory { + fn from(value: gix_index::File) -> Self { + IndexPersistedOrInMemory::InMemory(value) + } +} + /// A stand-in to a worktree as result of a worktree iteration. /// /// It provides access to typical worktree state, but may not actually point to a valid checkout as the latter has been moved or diff --git a/gix/tests/repository/mod.rs b/gix/tests/repository/mod.rs index 80a711e17f0..646d08c6829 100644 --- a/gix/tests/repository/mod.rs +++ b/gix/tests/repository/mod.rs @@ -19,6 +19,7 @@ mod worktree; mod dirwalk { use gix_dir::entry::Kind::*; use gix_dir::walk::EmissionMode; + use std::sync::atomic::AtomicBool; #[test] fn basics() -> crate::Result { @@ -26,22 +27,48 @@ mod dirwalk { let untracked_only = repo.dirwalk_options()?.emit_untracked(EmissionMode::CollapseDirectory); let mut collect = gix::dir::walk::delegate::Collect::default(); let index = repo.index()?; - repo.dirwalk(&index, None::<&str>, untracked_only, &mut collect)?; + repo.dirwalk( + &index, + None::<&str>, + &AtomicBool::default(), + untracked_only, + &mut collect, + )?; + let expected = [ + ("all-untracked".to_string(), Repository), + ("bare-repo-with-index.git".to_string(), Directory), + ("bare.git".into(), Directory), + ("non-bare-repo-without-index".into(), Repository), + ("some".into(), Directory), + ]; assert_eq!( collect .into_entries_by_path() .into_iter() .map(|e| (e.0.rela_path.to_string(), e.0.disk_kind.expect("kind is known"))) .collect::>(), - [ - ("all-untracked".to_string(), Repository), - ("bare-repo-with-index.git".to_string(), Directory), - ("bare.git".into(), Directory), - ("non-bare-repo-without-index".into(), Repository), - ("some".into(), Directory) - ], + expected, "note how bare repos are just directories by default" ); + let mut iter = repo.dirwalk_iter(index, None::<&str>, Default::default(), untracked_only)?; + let mut actual: Vec<_> = iter + .by_ref() + .map(Result::unwrap) + .map(|item| { + ( + item.entry.rela_path.to_string(), + item.entry.disk_kind.expect("kind is known"), + ) + }) + .collect(); + actual.sort_by(|a, b| a.0.cmp(&b.0)); + assert_eq!(actual, expected, "the iterator works the same"); + let out = iter.into_outcome().expect("iteration done and no error"); + assert_eq!( + out.dirwalk.returned_entries, + expected.len(), + "just a minor sanity check, assuming everything else works as well" + ); Ok(()) } } diff --git a/gix/tests/repository/open.rs b/gix/tests/repository/open.rs index f779ca39648..17a658dbf28 100644 --- a/gix/tests/repository/open.rs +++ b/gix/tests/repository/open.rs @@ -268,8 +268,14 @@ mod object_caches { fn default_git_and_custom_caches() -> crate::Result { let opts = gix::open::Options::isolated(); let repo = named_subrepo_opts("make_config_repos.sh", "object-caches", opts)?; - assert_eq!(repo.objects.has_object_cache(), cfg!(feature = "comfort")); - assert_eq!(repo.objects.has_pack_cache(), cfg!(feature = "comfort")); + assert_eq!( + repo.objects.has_object_cache(), + cfg!(all(feature = "parallel", feature = "comfort")) + ); + assert_eq!( + repo.objects.has_pack_cache(), + cfg!(all(feature = "parallel", feature = "comfort")) + ); Ok(()) } diff --git a/justfile b/justfile index 38294a5070f..e8890319849 100755 --- a/justfile +++ b/justfile @@ -181,6 +181,7 @@ unit-tests: cargo test -p gix-protocol --features blocking-client cargo test -p gix-protocol --features async-client cargo test -p gix --no-default-features + cargo test -p gix --no-default-features --features basic,extras,comfort cargo test -p gix --features async-network-client cargo test -p gix --features blocking-network-client cargo test -p gitoxide-core --lib From eff82eb0c67a7e7153c6119623a8aec052661b7d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 17 Mar 2024 21:07:16 +0100 Subject: [PATCH 09/11] adapt to changes in `gix` --- gitoxide-core/src/repository/clean.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/gitoxide-core/src/repository/clean.rs b/gitoxide-core/src/repository/clean.rs index fe91e3d2b38..79a85f0553f 100644 --- a/gitoxide-core/src/repository/clean.rs +++ b/gitoxide-core/src/repository/clean.rs @@ -86,6 +86,7 @@ pub(crate) mod function { } else { Vec::new() }, + &gix::interrupt::IS_INTERRUPTED, options, &mut collect, )?; From 741e3739ebd4bf48c3ef94b87ccce7602cb7cc2f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 18 Mar 2024 07:28:16 +0100 Subject: [PATCH 10/11] fix: allow parsing signatures with trailing numbers in offset (#1322) That way this won't fail, but *just* silently degenerates information. The idea is that during FSCK, objects should be decoded and then re-encoded to see if they are still the same. If not, this means some leniency kicked in. Maybe for FSCK, there would also have to be a refactor so there is a lenient and strict parsing mode. One problem at a time. --- gix-actor/src/signature/decode.rs | 9 +++++++-- gix-actor/tests/signature/mod.rs | 27 ++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/gix-actor/src/signature/decode.rs b/gix-actor/src/signature/decode.rs index b19deb510bd..8b31da6b001 100644 --- a/gix-actor/src/signature/decode.rs +++ b/gix-actor/src/signature/decode.rs @@ -36,9 +36,14 @@ pub(crate) mod function { take_while(1..=2, AsChar::is_dec_digit) .verify_map(|v| to_signed::(v).ok()) .context(StrContext::Expected("MM".into())), + take_while(0.., AsChar::is_dec_digit).map(|v: &[u8]| v), ) - .map(|(time, sign, hours, minutes)| { - let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 }; + .map(|(time, sign, hours, minutes, trailing_digits)| { + let offset = if trailing_digits.is_empty() { + (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 } + } else { + 0 + }; Time { seconds: time, offset, diff --git a/gix-actor/tests/signature/mod.rs b/gix-actor/tests/signature/mod.rs index 496fb54ae54..55effea170d 100644 --- a/gix-actor/tests/signature/mod.rs +++ b/gix-actor/tests/signature/mod.rs @@ -53,7 +53,7 @@ mod write_to { } use bstr::ByteSlice; -use gix_actor::Signature; +use gix_actor::{Signature, SignatureRef}; #[test] fn trim() { @@ -80,3 +80,28 @@ fn round_trip() -> Result<(), Box> { } Ok(()) } + +#[test] +fn parse_timestamp_with_trailing_digits() { + let signature = gix_actor::SignatureRef::from_bytes::<()>(b"first last 1312735823 +051800") + .expect("deal with trailing zeroes in timestamp by discarding it"); + assert_eq!( + signature, + SignatureRef { + name: "first last".into(), + email: "name@example.com".into(), + time: gix_actor::date::Time::new(1312735823, 0), + } + ); + + let signature = gix_actor::SignatureRef::from_bytes::<()>(b"first last 1312735823 +0518") + .expect("this naturally works as the timestamp does not have trailing zeroes"); + assert_eq!( + signature, + SignatureRef { + name: "first last".into(), + email: "name@example.com".into(), + time: gix_actor::date::Time::new(1312735823, 19080), + } + ); +} From ab210836ac4558c88cc5f19fdddfb5c8eeb29754 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 18 Mar 2024 07:08:49 +0100 Subject: [PATCH 11/11] fix: allow parsing of commits with broken timestamp (#1322). Instead, default to UTC just like Git. --- gix-object/tests/commit/from_bytes.rs | 27 +++++++++++++++++++ .../fixtures/commit/invalid-timestamp.txt | 6 +++++ 2 files changed, 33 insertions(+) create mode 100644 gix-object/tests/fixtures/commit/invalid-timestamp.txt diff --git a/gix-object/tests/commit/from_bytes.rs b/gix-object/tests/commit/from_bytes.rs index 84c98785561..c999ee5e945 100644 --- a/gix-object/tests/commit/from_bytes.rs +++ b/gix-object/tests/commit/from_bytes.rs @@ -8,6 +8,33 @@ use crate::{ fixture_name, linus_signature, signature, }; +#[test] +fn invalid_timestsamp() { + let actor = gix_actor::SignatureRef { + name: b"Name".as_bstr(), + email: b"name@example.com".as_bstr(), + time: Time { + seconds: 1312735823, + offset: 0, + sign: Sign::Plus, + }, + }; + assert_eq!( + CommitRef::from_bytes(&fixture_name("commit", "invalid-timestamp.txt")) + .expect("auto-correct invalid timestamp by discarding it (time is still valid UTC)"), + CommitRef { + tree: b"7989dfb2ec2f41914611a22fb30bbc2b3849df9a".as_bstr(), + parents: [b"8845ae683e2688bc619baade49510c17e978518f".as_bstr()].into(), + author: actor, + committer: actor, + encoding: None, + message: b"edit changelog to mention about x_sendfile_header default change".as_bstr(), + extra_headers: vec![] + }, + "the offset of the actor is null, leaving the UTC time" + ); +} + #[test] fn unsigned() -> crate::Result { assert_eq!( diff --git a/gix-object/tests/fixtures/commit/invalid-timestamp.txt b/gix-object/tests/fixtures/commit/invalid-timestamp.txt new file mode 100644 index 00000000000..bd30774f1ff --- /dev/null +++ b/gix-object/tests/fixtures/commit/invalid-timestamp.txt @@ -0,0 +1,6 @@ +tree 7989dfb2ec2f41914611a22fb30bbc2b3849df9a +parent 8845ae683e2688bc619baade49510c17e978518f +author Name 1312735823 +051800 +committer Name 1312735823 +051800 + +edit changelog to mention about x_sendfile_header default change \ No newline at end of file