Skip to content

Commit df0b52e

Browse files
committed
finally an understanding on collision checking (#301)
Let's not try to imitate git here but instead solve the problem our way while learning from bugs git ran into in the process. This will be particularly interesting when dealing with parallel checkouts, something we need to implement for sure. On the bright side, there seems to be opportunity for a performance boost due to less iops during clone.
1 parent effa0cf commit df0b52e

File tree

3 files changed

+113
-26
lines changed

3 files changed

+113
-26
lines changed

git-worktree/src/index.rs

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,35 @@
11
use git_hash::oid;
22

3+
use crate::{index, index::checkout::Collision};
4+
35
pub mod checkout {
46
use bstr::BString;
57
use quick_error::quick_error;
68

9+
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
10+
pub struct Collision {
11+
/// the path that collided with something already present on disk.
12+
pub path: BString,
13+
/// The io error we encountered when checking out `path`.
14+
pub error_kind: std::io::ErrorKind,
15+
}
16+
17+
pub struct Outcome {
18+
pub collisions: Vec<Collision>,
19+
}
20+
721
#[derive(Clone, Copy)]
822
pub struct Options {
923
/// capabilities of the file system
1024
pub fs: crate::fs::Capabilities,
1125
/// If true, we assume no file to exist in the target directory, and want exclusive access to it.
12-
/// This should be enabled when cloning.
26+
/// This should be enabled when cloning to avoid checks for freshness of files. This also enables
27+
/// detection of collisions based on whether or not exclusive file creation succeeds or fails.
1328
pub destination_is_initially_empty: bool,
29+
/// If true, default false, try to checkout as much as possible and don't abort on first error which isn't
30+
/// due to a conflict.
31+
/// The operation will never fail, but count the encountered errors instead along with their paths.
32+
pub keep_going: bool,
1433
/// If true, a files creation time is taken into consideration when checking if a file changed.
1534
/// Can be set to false in case other tools alter the creation time in ways that interfere with our operation.
1635
///
@@ -29,6 +48,7 @@ pub mod checkout {
2948
Options {
3049
fs: Default::default(),
3150
destination_is_initially_empty: false,
51+
keep_going: false,
3252
trust_ctime: true,
3353
check_stat: true,
3454
}
@@ -63,24 +83,43 @@ pub fn checkout<Find>(
6383
path: impl AsRef<std::path::Path>,
6484
mut find: Find,
6585
options: checkout::Options,
66-
) -> Result<(), checkout::Error>
86+
) -> Result<checkout::Outcome, checkout::Error>
6787
where
6888
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Option<git_object::BlobRef<'a>>,
6989
{
70-
if !options.destination_is_initially_empty {
71-
todo!("non-clone logic isn't implemented or vetted yet");
72-
}
7390
let root = path.as_ref();
7491
let mut buf = Vec::new();
92+
let mut collisions = Vec::new();
7593
for (entry, entry_path) in index.entries_mut_with_paths() {
7694
// TODO: write test for that
7795
if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) {
7896
continue;
7997
}
8098

81-
entry::checkout(entry, entry_path, &mut find, root, options, &mut buf)?;
99+
let res = entry::checkout(entry, entry_path, &mut find, root, options, &mut buf);
100+
match res {
101+
Ok(()) => {}
102+
// TODO: use ::IsDirectory as well when stabilized instead of raw_os_error()
103+
Err(index::checkout::Error::Io(err))
104+
if err.kind() == std::io::ErrorKind::AlreadyExists || err.raw_os_error() == Some(21) =>
105+
{
106+
// We are here because a file existed or was blocked by a directory which shouldn't be possible unless
107+
// we are on a file insensitive file system.
108+
collisions.push(Collision {
109+
path: entry_path.into(),
110+
error_kind: err.kind(),
111+
});
112+
}
113+
Err(err) => {
114+
if options.keep_going {
115+
todo!("keep going")
116+
} else {
117+
return Err(err);
118+
}
119+
}
120+
}
82121
}
83-
Ok(())
122+
Ok(checkout::Outcome { collisions })
84123
}
85124

86125
pub(crate) mod entry {
@@ -109,6 +148,7 @@ pub(crate) mod entry {
109148
executable_bit,
110149
..
111150
},
151+
destination_is_initially_empty,
112152
..
113153
}: index::checkout::Options,
114154
buf: &mut Vec<u8>,
@@ -130,7 +170,10 @@ pub(crate) mod entry {
130170
path: root.to_path_buf(),
131171
})?;
132172
let mut options = OpenOptions::new();
133-
options.create(true).write(true);
173+
options
174+
.create_new(destination_is_initially_empty)
175+
.create(!destination_is_initially_empty)
176+
.write(true);
134177
#[cfg(unix)]
135178
if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE {
136179
use std::os::unix::fs::OpenOptionsExt;
@@ -151,6 +194,8 @@ pub(crate) mod entry {
151194
let symlink_destination = git_features::path::from_byte_slice(obj.data)
152195
.map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?;
153196

197+
// TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether
198+
// a checkout is needed at all.
154199
if symlink {
155200
symlink::symlink_auto(symlink_destination, &dest)?;
156201
} else {

git-worktree/tests/fixtures/make_ignorecase_collisions.sh

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,17 @@ set -eu -o pipefail
44
git init -q
55
git config commit.gpgsign false
66

7-
touch a A
8-
git add a
9-
echo A | git update-index --add --stdin
7+
empty_oid=$(git hash-object -w --stdin </dev/null)
8+
9+
git update-index --index-info <<-EOF
10+
100644 $empty_oid FILE_X
11+
100644 $empty_oid FILE_x
12+
100644 $empty_oid file_X
13+
100644 $empty_oid file_x
14+
100644 $empty_oid D/B
15+
100644 $empty_oid D/C
16+
100644 $empty_oid d
17+
EOF
1018

1119
git commit -m "init"
20+
git checkout -f HEAD;

git-worktree/tests/index/mod.rs

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@ mod checkout {
33
use std::os::unix::prelude::MetadataExt;
44
use std::{
55
fs,
6+
io::ErrorKind,
67
path::{Path, PathBuf},
78
};
89

910
use git_object::bstr::ByteSlice;
1011
use git_odb::FindExt;
11-
use git_worktree::fs::Capabilities;
12-
use git_worktree::index;
12+
use git_worktree::{fs::Capabilities, index, index::checkout::Collision};
1313
use tempfile::TempDir;
1414

1515
use crate::fixture_path;
@@ -34,9 +34,11 @@ mod checkout {
3434
#[test]
3535
fn symlinks_become_files_if_disabled() -> crate::Result {
3636
let opts = opts_with_symlink(false);
37-
let (source_tree, destination, _index) = checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?;
37+
let (source_tree, destination, _index, outcome) =
38+
checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?;
3839

3940
assert_equality(&source_tree, &destination, opts.fs.symlink)?;
41+
assert!(outcome.collisions.is_empty());
4042

4143
Ok(())
4244
}
@@ -49,9 +51,11 @@ mod checkout {
4951
// skip if symlinks aren't supported anyway.
5052
return Ok(());
5153
};
52-
let (source_tree, destination, _index) = checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?;
54+
let (source_tree, destination, _index, outcome) =
55+
checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?;
5356

5457
assert_equality(&source_tree, &destination, opts.fs.symlink)?;
58+
assert!(outcome.collisions.is_empty());
5559
Ok(())
5660
}
5761

@@ -62,11 +66,12 @@ mod checkout {
6266
return;
6367
}
6468
let opts = opts_with_symlink(true);
65-
let (source_tree, destination, index) = checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap();
66-
assert_eq!(index.entries().len(), 2, "there is just one colliding item");
69+
let (source_tree, destination, index, outcome) =
70+
checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap();
6771

6872
let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap();
6973
assert_eq!(num_files, index.entries().len(), "it checks out all files");
74+
assert!(outcome.collisions.is_empty());
7075
}
7176

7277
#[test]
@@ -76,21 +81,44 @@ mod checkout {
7681
return;
7782
}
7883
let opts = opts_with_symlink(true);
79-
let (source_tree, destination, index) = checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap();
80-
assert_eq!(index.entries().len(), 2, "there is just one colliding item");
84+
let (source_tree, destination, _index, outcome) =
85+
checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap();
86+
87+
assert_eq!(
88+
outcome.collisions,
89+
vec![
90+
Collision {
91+
path: "FILE_x".into(),
92+
error_kind: ErrorKind::AlreadyExists,
93+
},
94+
Collision {
95+
path: "d".into(),
96+
error_kind: ErrorKind::AlreadyExists,
97+
},
98+
Collision {
99+
path: "file_X".into(),
100+
error_kind: ErrorKind::AlreadyExists,
101+
},
102+
Collision {
103+
path: "file_x".into(),
104+
error_kind: ErrorKind::AlreadyExists,
105+
},
106+
],
107+
"these files couldn't be checked out"
108+
);
81109

82110
let source_files = dir_structure(&source_tree);
83111
assert_eq!(
84112
stripped_prefix(&source_tree, &source_files),
85-
vec![PathBuf::from("a")],
86-
"the source also only contains the first created file"
113+
vec![PathBuf::from("d"), PathBuf::from("file_x")],
114+
"plenty of collisions prevent a checkout"
87115
);
88116

89117
let dest_files = dir_structure(&destination);
90118
assert_eq!(
91119
stripped_prefix(&destination, &dest_files),
92-
vec![PathBuf::from("A")],
93-
"it only creates the first file of a collision"
120+
vec![PathBuf::from("D/B"), PathBuf::from("D/C"), PathBuf::from("FILE_X")],
121+
"we checkout files in order and generally handle collision detection differently, hence the difference"
94122
);
95123
}
96124

@@ -138,20 +166,25 @@ mod checkout {
138166
fn checkout_index_in_tmp_dir(
139167
opts: index::checkout::Options,
140168
name: &str,
141-
) -> crate::Result<(PathBuf, TempDir, git_index::File)> {
169+
) -> crate::Result<(
170+
PathBuf,
171+
TempDir,
172+
git_index::File,
173+
git_worktree::index::checkout::Outcome,
174+
)> {
142175
let source_tree = fixture_path(name);
143176
let git_dir = source_tree.join(".git");
144177
let mut index = git_index::File::at(git_dir.join("index"), Default::default())?;
145178
let odb = git_odb::at(git_dir.join("objects"))?;
146179
let destination = tempfile::tempdir()?;
147180

148-
index::checkout(
181+
let outcome = index::checkout(
149182
&mut index,
150183
&destination,
151184
move |oid, buf| odb.find_blob(oid, buf).ok(),
152185
opts,
153186
)?;
154-
Ok((source_tree, destination, index))
187+
Ok((source_tree, destination, index, outcome))
155188
}
156189

157190
fn stripped_prefix(prefix: impl AsRef<Path>, source_files: &[PathBuf]) -> Vec<&Path> {

0 commit comments

Comments
 (0)