Skip to content

Commit 773ea1f

Browse files
committed
feat: diff between worktree and index
1 parent deabba6 commit 773ea1f

File tree

5 files changed

+376
-0
lines changed

5 files changed

+376
-0
lines changed

gix-worktree/src/diff.rs

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
use std::io::{self, ErrorKind};
2+
use std::path::{Path, PathBuf};
3+
use std::time::{Duration, SystemTimeError};
4+
5+
use bstr::BString;
6+
use gix_features::hash;
7+
use gix_hash::ObjectId;
8+
use gix_index as index;
9+
use gix_object::encode::loose_header;
10+
use gix_path as path;
11+
12+
use crate::fs;
13+
use crate::read::{self, read_blob_to_buf_with_meta};
14+
15+
/// How the mode of an index entry has changed
16+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
17+
pub enum ModeChange {
18+
/// Shown as `typechange` in git status
19+
/// For example if a normal file was replaced with a symlink.
20+
/// Note: Except for submodules only files/symlinks are present in the
21+
/// the index so anything turning into a directory is counted as a removal
22+
TypeChange,
23+
/// The executable bit of a file changed
24+
ExecutableChange,
25+
}
26+
27+
/// How a worktree file changed compared to an index entry
28+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
29+
pub struct FileModification {
30+
/// How the mode has changed
31+
pub mode_change: Option<ModeChange>,
32+
/// mtime/ctime changed. If this is false then we can assume
33+
/// that the file is uncahged (with the exception of racy timestamps).
34+
/// If this is true however the file might still be unchaged. We need
35+
/// to read the file from disk and compare it to the object in
36+
/// index.
37+
pub stat_changed: bool,
38+
/// The data of this entry has changed. This can be quickly
39+
/// determined if the size of the stat data is mismatched.
40+
/// Otherwise a data change must be detected by reading the file
41+
/// from disk and comparing it to the file stored in the index
42+
/// (only needs to be done if `self.stat_changed` is true)
43+
pub data_changed: bool,
44+
}
45+
46+
impl FileModification {
47+
/// Computes the status of an entry by comparing its stat to `symlink_metadata()`
48+
pub fn from_stat(
49+
entry: &index::Entry,
50+
fs_stat: &std::fs::Metadata,
51+
capabilites: &fs::Capabilities,
52+
) -> Result<FileModification, SystemTimeError> {
53+
#[cfg(unix)]
54+
use std::os::unix::fs::MetadataExt;
55+
56+
let mode_change = match entry.mode {
57+
index::entry::Mode::FILE if !fs_stat.is_file() => Some(ModeChange::TypeChange),
58+
#[cfg(unix)]
59+
index::entry::Mode::FILE if capabilites.executable_bit && fs_stat.mode() & 0o111 != 0 => {
60+
Some(ModeChange::ExecutableChange)
61+
}
62+
#[cfg(unix)]
63+
index::entry::Mode::FILE_EXECUTABLE if capabilites.executable_bit && fs_stat.mode() & 0o111 == 0 => {
64+
Some(ModeChange::ExecutableChange)
65+
}
66+
index::entry::Mode::SYMLINK if !fs_stat.is_symlink() => Some(ModeChange::TypeChange),
67+
index::entry::Mode::COMMIT if !fs_stat.is_dir() => Some(ModeChange::TypeChange),
68+
_ => None, // TODO: log/errror invalid file type
69+
};
70+
71+
let data_changed = entry.stat.size as u64 != fs_stat.len();
72+
73+
let ctime = fs_stat
74+
.created()
75+
.map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?;
76+
let mtime = fs_stat
77+
.modified()
78+
.map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?;
79+
80+
let stat = &entry.stat;
81+
let stat_changed = stat.mtime.secs
82+
!= mtime
83+
.as_secs()
84+
.try_into()
85+
.expect("by 2038 we found a solution for this")
86+
|| stat.mtime.nsecs != mtime.subsec_nanos()
87+
|| stat.ctime.secs
88+
!= ctime
89+
.as_secs()
90+
.try_into()
91+
.expect("by 2038 we found a solution for this")
92+
|| stat.ctime.nsecs != ctime.subsec_nanos();
93+
94+
Ok(Self {
95+
mode_change,
96+
stat_changed,
97+
data_changed,
98+
})
99+
}
100+
101+
/// Marks this entries stats as changed if there is a potential fs race condition
102+
pub fn detect_racy_stat(&mut self, index: &index::State, index_entry: &index::Entry) {
103+
self.stat_changed = self.stat_changed || index_entry.stat.mtime >= index.timestamp()
104+
}
105+
106+
/// returns true if this entry has any changes
107+
/// usually `detect_racy_stat` should be called first to avoid race condition
108+
pub fn changed(&self) -> bool {
109+
self.mode_change.is_some() || self.stat_changed || self.data_changed
110+
}
111+
112+
/// Reads the worktree file from the disk and compares it to
113+
/// the index entries oid to check if the actual data of the file is changed
114+
/// and sets [`Entry::data_changed`] accordingly
115+
pub fn compare_data(
116+
&mut self,
117+
worktree_path: &Path,
118+
index_entry: &index::Entry,
119+
buf: &mut Vec<u8>,
120+
capabilities: &fs::Capabilities,
121+
) -> Result<(), read::Error> {
122+
if self.mode_change.is_some() || !self.stat_changed || self.data_changed {
123+
return Ok(());
124+
}
125+
let data = read_blob_to_buf_with_meta(
126+
worktree_path,
127+
index_entry.mode.contains(index::entry::Mode::SYMLINK),
128+
buf,
129+
capabilities,
130+
)?;
131+
let header = loose_header(gix_object::Kind::Blob, data.len());
132+
let hash_changed = match index_entry.id {
133+
ObjectId::Sha1(entry_hash) => {
134+
let mut file_hash = hash::Sha1::default();
135+
file_hash.update(&header);
136+
file_hash.update(&data);
137+
let file_hash = file_hash.digest();
138+
entry_hash != file_hash
139+
}
140+
};
141+
self.data_changed = hash_changed;
142+
Ok(())
143+
}
144+
}
145+
146+
#[allow(missing_docs)]
147+
#[derive(Debug, thiserror::Error)]
148+
pub enum Error {
149+
#[error("Could not convert path to UTF8 {path}")]
150+
IllformedUtf8 { path: BString },
151+
#[error("The clock was off when reading file related metadata after updating a file on disk")]
152+
Time(#[from] std::time::SystemTimeError),
153+
#[error("IO error while writing blob or reading file metadata or changing filetype")]
154+
Io(#[from] io::Error),
155+
}
156+
157+
#[derive(Clone, Debug)]
158+
/// A change between the index and the worktree computed by [`compate_to_index`]
159+
pub struct Change<'a> {
160+
/// The index entry that changed
161+
pub index_entry: &'a index::Entry,
162+
/// The on-disk worktree path corresponding to this entry
163+
pub worktree_path: PathBuf,
164+
/// How this index entry changed
165+
pub kind: ChangeKind,
166+
/// file metadata that can be reused (optimization)
167+
pub fstat: Option<std::fs::Metadata>,
168+
}
169+
170+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
171+
///
172+
pub enum ChangeKind {
173+
/// An index entry has no corresponding file in the worktree
174+
Removed,
175+
/// Ar new files that has been marked with git add but has not yet been
176+
/// checked in yet. No diff is computed for these files because whatever is
177+
/// on disk at commit time will be used
178+
Added,
179+
/// Called for files that may have changed in some form as indicated by `change`.
180+
/// Note that this doesn't necessarily mean that the *content* of the file changed
181+
/// see [`FileStatus`] for details
182+
Modified {
183+
/// How the file was modified exactly
184+
modification: FileModification,
185+
/// Whether this (changed) file also has an unresolved merge conflict
186+
conflict: bool,
187+
},
188+
/// There are unresolved merge conflicts for this file
189+
/// but it has not changed on disk
190+
Conflict,
191+
}
192+
193+
/// Computes the changes between the index and the worktree
194+
pub fn compare_to_index<'a: 'b, 'b>(
195+
index: &'a index::State,
196+
// TODO: use worktree cache instead
197+
worktree: &'b Path,
198+
capabilities: &'b fs::Capabilities,
199+
) -> impl Iterator<Item = Result<Change<'a>, Error>> + 'b {
200+
// TODO: parallel with rayon
201+
index.entries().iter().filter_map(|index_entry| {
202+
let conflict = match index_entry.stage() {
203+
0 => false,
204+
1 => true,
205+
_ => return None,
206+
};
207+
let git_path = index_entry.path(index);
208+
if index_entry.flags.intersects(
209+
index::entry::Flags::UPTODATE
210+
| index::entry::Flags::SKIP_WORKTREE
211+
| index::entry::Flags::ASSUME_VALID
212+
| index::entry::Flags::FSMONITOR_VALID,
213+
) {
214+
return None;
215+
}
216+
217+
let path = if let Ok(path) = path::try_from_bstr(git_path) {
218+
path
219+
} else {
220+
return Some(Err(Error::IllformedUtf8 {
221+
path: git_path.to_owned(),
222+
}));
223+
};
224+
225+
let worktree_path = worktree.join(path);
226+
let metadata = match worktree_path.symlink_metadata() {
227+
// TODO: check if any parent directory is a symlink
228+
// we need to use fs::Cache for that
229+
Ok(metadata) if metadata.is_dir() => {
230+
// index entries are normally only for files/symlinks
231+
// if a file turned into a directory it was removed
232+
// the only exception here are submodules which are
233+
// part of the index despite being directories
234+
//
235+
// TODO: submodules:
236+
// if entry.mode.contains(Mode::COMMIT) &&
237+
// resolve_gitlink_ref(ce->name, "HEAD", &sub))
238+
return Some(Ok(Change {
239+
kind: ChangeKind::Removed,
240+
index_entry,
241+
worktree_path,
242+
fstat: Some(metadata),
243+
}));
244+
}
245+
Ok(metdata) => metdata,
246+
Err(err) if err.kind() == ErrorKind::NotFound => {
247+
return Some(Ok(Change {
248+
kind: ChangeKind::Removed,
249+
index_entry,
250+
worktree_path,
251+
fstat: None,
252+
}))
253+
}
254+
Err(err) => {
255+
// TODO: strict mode?
256+
return Some(Err(err.into()));
257+
}
258+
};
259+
if index_entry.flags.contains(index::entry::Flags::INTENT_TO_ADD) {
260+
return Some(Ok(Change {
261+
kind: ChangeKind::Added,
262+
index_entry,
263+
worktree_path,
264+
fstat: None,
265+
}));
266+
}
267+
let mut change = match FileModification::from_stat(index_entry, &metadata, capabilities) {
268+
Ok(change) => change,
269+
Err(err) => return Some(Err(err.into())),
270+
};
271+
change.detect_racy_stat(index, index_entry);
272+
println!("{change:?} {} {worktree_path:?}", metadata.is_symlink());
273+
274+
let kind = if change.changed() {
275+
ChangeKind::Modified {
276+
modification: change,
277+
conflict,
278+
}
279+
} else if conflict {
280+
ChangeKind::Conflict
281+
} else {
282+
return None;
283+
};
284+
285+
Some(Ok(Change {
286+
kind,
287+
index_entry,
288+
worktree_path,
289+
fstat: Some(metadata),
290+
}))
291+
})
292+
}

gix-worktree/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,12 @@ pub mod fs;
1313
pub mod index;
1414

1515
pub(crate) mod os;
16+
17+
///
18+
pub mod diff;
19+
20+
///
21+
pub mod untracked;
22+
1623
///
1724
pub mod read;

gix-worktree/src/untracked.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
// TODO: untracked file detection, needs fs::Cache
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
use std::fs::{self};
2+
use std::path::Path;
3+
4+
use bstr::BString;
5+
use gix_worktree as worktree;
6+
use worktree::diff::{ChangeKind, FileModification};
7+
8+
fn compute_diff(name: &str, make_worktree_dirty: impl FnOnce(&Path)) -> Vec<(ChangeKind, BString)> {
9+
let work_tree =
10+
gix_testtools::scripted_fixture_writable(Path::new(name).with_extension("sh")).expect("script works");
11+
let git_dir = work_tree.path().join(".git");
12+
make_worktree_dirty(work_tree.path());
13+
let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap();
14+
let capapilites = worktree::fs::Capabilities::probe(git_dir);
15+
let mut buf = Vec::with_capacity(8 * 1024);
16+
worktree::diff::compare_to_index(&index, work_tree.path(), &capapilites)
17+
.filter_map(|change| {
18+
let mut change = change.unwrap();
19+
if let ChangeKind::Modified {
20+
ref mut modification, ..
21+
} = &mut change.kind
22+
{
23+
modification
24+
.compare_data(&change.worktree_path, change.index_entry, &mut buf, &capapilites)
25+
.unwrap();
26+
if modification.mode_change.is_none() && !modification.data_changed {
27+
return None;
28+
}
29+
}
30+
Some((change.kind, change.index_entry.path(&index).to_owned()))
31+
})
32+
.collect()
33+
}
34+
35+
#[test]
36+
fn removed() {
37+
let diff = compute_diff("make_mixed_without_submodules", |path| {
38+
fs::remove_file(path.join("executable")).unwrap();
39+
fs::remove_file(path.join("dir/content")).unwrap();
40+
fs::remove_file(path.join("dir/sub-dir/symlink")).unwrap();
41+
});
42+
43+
assert_eq!(
44+
diff,
45+
vec![
46+
(ChangeKind::Removed, BString::new(b"dir/content".to_vec())),
47+
(ChangeKind::Removed, BString::new(b"dir/sub-dir/symlink".to_vec())),
48+
(ChangeKind::Removed, BString::new(b"executable".to_vec())),
49+
]
50+
)
51+
}
52+
53+
#[test]
54+
fn changed() {
55+
let diff = compute_diff("make_mixed_without_submodules", |path| {
56+
fs::write(path.join("dir/content"), "hello_world").unwrap();
57+
// write same content to this file to simulate a touch command
58+
fs::write(path.join("executable"), "content").unwrap();
59+
});
60+
61+
assert_eq!(
62+
diff,
63+
vec![(
64+
ChangeKind::Modified {
65+
modification: FileModification {
66+
mode_change: None,
67+
stat_changed: true,
68+
data_changed: true
69+
},
70+
conflict: false
71+
},
72+
BString::new(b"dir/content".to_vec())
73+
),]
74+
)
75+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
mod checkout;
2+
mod diff;

0 commit comments

Comments
 (0)