Skip to content

Commit ba3f2db

Browse files
committed
feat!: provide Repository::dirwalk_iter().
That way, more copying happens but the usability increases tremendously as well. It's breaking as public types moved from `repository::dirwalk` to `dirwalk`, dissolving `repository::dirwalk` entirely.
1 parent e48ba08 commit ba3f2db

File tree

15 files changed

+472
-133
lines changed

15 files changed

+472
-133
lines changed

gix/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ interrupt = ["dep:signal-hook", "gix-tempfile/signals"]
7474
index = ["dep:gix-index"]
7575

7676
## Support directory walks with Git-style annoations.
77-
dirwalk = ["dep:gix-dir"]
77+
dirwalk = ["dep:gix-dir", "attributes", "excludes"]
7878

7979
## Access to credential helpers, which provide credentials for URLs.
8080
# Note that `gix-negotiate` just piggibacks here, as 'credentials' is equivalent to 'fetch & push' right now.

gix/src/dirwalk/iter.rs

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
use super::Iter;
2+
use crate::bstr::BString;
3+
use crate::util::OwnedOrStaticAtomicBool;
4+
use crate::worktree::IndexPersistedOrInMemory;
5+
use crate::{dirwalk, PathspecDetached, Repository};
6+
use std::path::PathBuf;
7+
8+
/// An entry of the directory walk as returned by the [iterator](Iter).
9+
pub struct Item {
10+
/// The directory entry.
11+
pub entry: gix_dir::Entry,
12+
/// `collapsed_directory_status` is `Some(dir_status)` if this entry was part of a directory with the given
13+
/// `dir_status` that wasn't the same as the one of `entry` and if [gix_dir::walk::Options::emit_collapsed] was
14+
/// [gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch]. It will also be `Some(dir_status)` if that option
15+
/// was [gix_dir::walk::CollapsedEntriesEmissionMode::All].
16+
pub collapsed_directory_status: Option<gix_dir::entry::Status>,
17+
}
18+
19+
impl Item {
20+
fn new(entry: gix_dir::EntryRef<'_>, collapsed_directory_status: Option<gix_dir::entry::Status>) -> Self {
21+
Item {
22+
entry: entry.to_owned(),
23+
collapsed_directory_status,
24+
}
25+
}
26+
}
27+
28+
/// The outcome of fully consumed [dirwalk iterator](Iter).
29+
pub struct Outcome {
30+
/// The index originally passed in to create the iterator.
31+
pub index: IndexPersistedOrInMemory,
32+
/// The excludes stack used for the dirwalk, for access of `.gitignore` information.
33+
pub excludes: gix_worktree::Stack,
34+
/// The pathspecs used to guide the operation,
35+
pub pathspec: PathspecDetached,
36+
/// The root actually being used for the traversal, and useful to transform the paths returned for the user.
37+
/// It's always within the [`work-dir`](Repository::work_dir).
38+
pub traversal_root: PathBuf,
39+
/// The actual result of the dirwalk.
40+
pub dirwalk: gix_dir::walk::Outcome,
41+
}
42+
43+
/// The error returned by [Repository::dirwalk_iter()].
44+
#[derive(Debug, thiserror::Error)]
45+
#[allow(missing_docs)]
46+
pub enum Error {
47+
#[error("Failed to spawn producer thread")]
48+
#[cfg(feature = "parallel")]
49+
SpawnThread(#[from] std::io::Error),
50+
#[error(transparent)]
51+
#[cfg(not(feature = "parallel"))]
52+
Dirwalk(#[from] dirwalk::Error),
53+
#[error(transparent)]
54+
#[cfg(not(feature = "parallel"))]
55+
DetachPathSpec(#[from] std::io::Error),
56+
}
57+
58+
/// Lifecycle
59+
impl Iter {
60+
pub(crate) fn new(
61+
repo: &Repository,
62+
index: IndexPersistedOrInMemory,
63+
patterns: Vec<BString>,
64+
should_interrupt: OwnedOrStaticAtomicBool,
65+
options: dirwalk::Options,
66+
) -> Result<Iter, Error> {
67+
#[cfg(feature = "parallel")]
68+
{
69+
let repo = repo.clone().into_sync();
70+
let (tx, rx) = std::sync::mpsc::channel();
71+
let handle = std::thread::Builder::new()
72+
.name("gix::dirwalk::iter::producer".into())
73+
.spawn({
74+
let should_interrupt = should_interrupt.clone();
75+
move || -> Result<Outcome, dirwalk::Error> {
76+
let repo: Repository = repo.into();
77+
let mut collect = Collect { tx };
78+
let out = repo.dirwalk(&index, patterns, &should_interrupt, options, &mut collect)?;
79+
Ok(Outcome {
80+
index,
81+
excludes: out.excludes.detach(),
82+
pathspec: out.pathspec.detach().map_err(|err| {
83+
dirwalk::Error::Walk(gix_dir::walk::Error::ReadDir {
84+
path: repo.git_dir().to_owned(),
85+
source: err,
86+
})
87+
})?,
88+
traversal_root: out.traversal_root,
89+
dirwalk: out.dirwalk,
90+
})
91+
}
92+
})?;
93+
94+
Ok(Iter {
95+
rx_and_join: Some((rx, handle)),
96+
should_interrupt,
97+
out: None,
98+
})
99+
}
100+
#[cfg(not(feature = "parallel"))]
101+
{
102+
let mut collect = Collect { items: Vec::new() };
103+
let out = repo.dirwalk(&index, patterns, &should_interrupt, options, &mut collect)?;
104+
let out = Outcome {
105+
index,
106+
excludes: out.excludes.detach(),
107+
pathspec: out.pathspec.detach()?,
108+
traversal_root: out.traversal_root,
109+
dirwalk: out.dirwalk,
110+
};
111+
112+
Ok(Iter {
113+
items: collect.items.into_iter(),
114+
out: Some(out),
115+
})
116+
}
117+
}
118+
}
119+
120+
/// Access
121+
impl Iter {
122+
/// Return the outcome of the iteration, or `None` if the iterator isn't fully consumed.
123+
pub fn outcome_mut(&mut self) -> Option<&mut Outcome> {
124+
self.out.as_mut()
125+
}
126+
127+
/// Turn the iterator into the iteration outcome, which is `None` on error or if the iteration
128+
/// isn't complete.
129+
pub fn into_outcome(mut self) -> Option<Outcome> {
130+
self.out.take()
131+
}
132+
}
133+
134+
impl Iterator for Iter {
135+
type Item = Result<Item, dirwalk::Error>;
136+
137+
fn next(&mut self) -> Option<Self::Item> {
138+
#[cfg(feature = "parallel")]
139+
{
140+
let (rx, _join) = self.rx_and_join.as_ref()?;
141+
match rx.recv().ok() {
142+
Some(item) => Some(Ok(item)),
143+
None => {
144+
let (_rx, handle) = self.rx_and_join.take()?;
145+
match handle.join().expect("no panic") {
146+
Ok(out) => {
147+
self.out = Some(out);
148+
None
149+
}
150+
Err(err) => Some(Err(err)),
151+
}
152+
}
153+
}
154+
}
155+
#[cfg(not(feature = "parallel"))]
156+
self.items.next().map(Ok)
157+
}
158+
}
159+
160+
#[cfg(feature = "parallel")]
161+
impl Drop for Iter {
162+
fn drop(&mut self) {
163+
crate::util::parallel_iter_drop(self.rx_and_join.take(), &self.should_interrupt);
164+
}
165+
}
166+
167+
struct Collect {
168+
#[cfg(feature = "parallel")]
169+
tx: std::sync::mpsc::Sender<Item>,
170+
#[cfg(not(feature = "parallel"))]
171+
items: Vec<Item>,
172+
}
173+
174+
impl gix_dir::walk::Delegate for Collect {
175+
fn emit(
176+
&mut self,
177+
entry: gix_dir::EntryRef<'_>,
178+
collapsed_directory_status: Option<gix_dir::entry::Status>,
179+
) -> gix_dir::walk::Action {
180+
// NOTE: we assume that the receiver triggers interruption so the operation will stop if the receiver is down.
181+
let item = Item::new(entry, collapsed_directory_status);
182+
#[cfg(feature = "parallel")]
183+
self.tx.send(item).ok();
184+
#[cfg(not(feature = "parallel"))]
185+
self.items.push(item);
186+
gix_dir::walk::Action::Continue
187+
}
188+
}

gix/src/dirwalk/mod.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
use gix_dir::walk::{CollapsedEntriesEmissionMode, EmissionMode, ForDeletionMode};
2+
3+
use crate::{config, AttributeStack, Pathspec};
4+
use std::path::PathBuf;
5+
6+
mod options;
7+
8+
///
9+
#[allow(clippy::empty_docs)]
10+
pub mod iter;
11+
12+
/// An iterator for entries in a directory walk.
13+
///
14+
/// ### Parallel Operation
15+
///
16+
/// Note that without the `parallel` feature, the iterator becomes 'serial', which means that all entries will be traversed
17+
/// in advance and it cannot be interrupted unless the interrupt flag is set from another thread.
18+
///
19+
/// It's a crutch that is just there to make single-threaded applications possible at all, as it's not really an iterator
20+
/// anymore. If this matters, better run [Repository::dirwalk()](crate::Repository::dirwalk) by hand as it provides all
21+
/// control one would need, just not as an iterator.
22+
///
23+
/// Also, even with `parallel` set, the first call to `next()` will block until there is an item available, without a chance
24+
/// to interrupt unless the interrupt flag is set from another thread.
25+
pub struct Iter {
26+
#[cfg(feature = "parallel")]
27+
#[allow(clippy::type_complexity)]
28+
rx_and_join: Option<(
29+
std::sync::mpsc::Receiver<iter::Item>,
30+
std::thread::JoinHandle<Result<iter::Outcome, Error>>,
31+
)>,
32+
#[cfg(feature = "parallel")]
33+
should_interrupt: crate::util::OwnedOrStaticAtomicBool,
34+
/// Without parallelization, the iterator has to buffer all changes in advance.
35+
#[cfg(not(feature = "parallel"))]
36+
items: std::vec::IntoIter<iter::Item>,
37+
/// The outcome of the operation, only available once the operation has ended.
38+
out: Option<iter::Outcome>,
39+
}
40+
41+
/// The error returned by [dirwalk()](crate::Repository::dirwalk()).
42+
#[derive(Debug, thiserror::Error)]
43+
#[allow(missing_docs)]
44+
pub enum Error {
45+
#[error(transparent)]
46+
Walk(#[from] gix_dir::walk::Error),
47+
#[error("A working tree is required to perform a directory walk")]
48+
MissingWorkDir,
49+
#[error(transparent)]
50+
Excludes(#[from] config::exclude_stack::Error),
51+
#[error(transparent)]
52+
Pathspec(#[from] crate::pathspec::init::Error),
53+
#[error(transparent)]
54+
Prefix(#[from] gix_path::realpath::Error),
55+
#[error(transparent)]
56+
FilesystemOptions(#[from] config::boolean::Error),
57+
}
58+
59+
/// The outcome of the [dirwalk()](crate::Repository::dirwalk).
60+
pub struct Outcome<'repo> {
61+
/// The excludes stack used for the dirwalk, for access of `.gitignore` information.
62+
pub excludes: AttributeStack<'repo>,
63+
/// The pathspecs used to guide the operation,
64+
pub pathspec: Pathspec<'repo>,
65+
/// The root actually being used for the traversal, and useful to transform the paths returned for the user.
66+
/// It's always within the [`work-dir`](crate::Repository::work_dir).
67+
pub traversal_root: PathBuf,
68+
/// The actual result of the dirwalk.
69+
pub dirwalk: gix_dir::walk::Outcome,
70+
}
71+
72+
/// Options for use in the [`Repository::dirwalk()`](crate::Repository::dirwalk()) function.
73+
///
74+
/// Note that all values start out disabled.
75+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
76+
pub struct Options {
77+
precompose_unicode: bool,
78+
ignore_case: bool,
79+
80+
recurse_repositories: bool,
81+
emit_pruned: bool,
82+
emit_ignored: Option<EmissionMode>,
83+
for_deletion: Option<ForDeletionMode>,
84+
emit_tracked: bool,
85+
emit_untracked: EmissionMode,
86+
emit_empty_directories: bool,
87+
classify_untracked_bare_repositories: bool,
88+
emit_collapsed: Option<CollapsedEntriesEmissionMode>,
89+
symlinks_to_directories_are_ignored_like_directories: bool,
90+
pub(crate) empty_patterns_match_prefix: bool,
91+
}

gix/src/dirwalk.rs renamed to gix/src/dirwalk/options.rs

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,6 @@
1+
use crate::dirwalk::Options;
12
use gix_dir::walk::{CollapsedEntriesEmissionMode, EmissionMode, ForDeletionMode};
23

3-
/// Options for use in the [`Repository::dirwalk()`](crate::Repository::dirwalk()) function.
4-
///
5-
/// Note that all values start out disabled.
6-
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
7-
pub struct Options {
8-
precompose_unicode: bool,
9-
ignore_case: bool,
10-
11-
recurse_repositories: bool,
12-
emit_pruned: bool,
13-
emit_ignored: Option<EmissionMode>,
14-
for_deletion: Option<ForDeletionMode>,
15-
emit_tracked: bool,
16-
emit_untracked: EmissionMode,
17-
emit_empty_directories: bool,
18-
classify_untracked_bare_repositories: bool,
19-
emit_collapsed: Option<CollapsedEntriesEmissionMode>,
20-
symlinks_to_directories_are_ignored_like_directories: bool,
21-
pub(crate) empty_patterns_match_prefix: bool,
22-
}
23-
244
/// Construction
255
impl Options {
266
pub(crate) fn from_fs_caps(caps: gix_fs::Capabilities) -> Self {

gix/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,8 @@ pub use types::{Pathspec, PathspecDetached, Submodule};
177177
#[allow(clippy::empty_docs)]
178178
pub mod clone;
179179
pub mod commit;
180-
#[cfg(feature = "dirwalk")]
181180
///
181+
#[cfg(feature = "dirwalk")]
182182
#[allow(clippy::empty_docs)]
183183
pub mod dirwalk;
184184
pub mod head;
@@ -191,6 +191,8 @@ pub mod repository;
191191
#[cfg(feature = "attributes")]
192192
pub mod submodule;
193193
pub mod tag;
194+
#[cfg(any(feature = "dirwalk", feature = "status"))]
195+
pub(crate) mod util;
194196

195197
///
196198
#[allow(clippy::empty_docs)]

0 commit comments

Comments
 (0)