Skip to content

Commit 8196a43

Browse files
committed
For linear histories, avoid redoing path lookup work
Also, set a fixed and higher pack-cache to double typical pack-decode performance in mid-sized repositories. Additionally, normalize the input path.
1 parent 667e626 commit 8196a43

File tree

2 files changed

+54
-20
lines changed

2 files changed

+54
-20
lines changed

gitoxide-core/src/repository/blame.rs

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
use gix::bstr::BStr;
1+
use gix::bstr::ByteSlice;
2+
use gix::config::tree;
23
use std::ffi::OsStr;
34

45
pub fn blame_file(
@@ -7,17 +8,39 @@ pub fn blame_file(
78
out: impl std::io::Write,
89
err: Option<&mut dyn std::io::Write>,
910
) -> anyhow::Result<()> {
10-
repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?));
11+
{
12+
let mut config = repo.config_snapshot_mut();
13+
if config.string(&tree::Core::DELTA_BASE_CACHE_LIMIT).is_none() {
14+
config.set_value(&tree::Core::DELTA_BASE_CACHE_LIMIT, "100m")?;
15+
}
16+
}
17+
let index = repo.index_or_empty()?;
18+
repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&index));
19+
20+
let file = gix::path::os_str_into_bstr(file)?;
21+
let specs = repo.pathspec(
22+
false,
23+
[file],
24+
true,
25+
&index,
26+
gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping.adjust_for_bare(repo.is_bare()),
27+
)?;
28+
// TODO: there should be a way to normalize paths without going through patterns, at least in this case maybe?
29+
// `Search` actually sorts patterns by excluding or not, all that can lead to strange results.
30+
let file = specs
31+
.search()
32+
.patterns()
33+
.map(|p| p.path().to_owned())
34+
.next()
35+
.expect("exactly one pattern");
1136

1237
let suspect = repo.head()?.peel_to_commit_in_place()?;
1338
let traverse =
1439
gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::<Vec<gix::ObjectId>>)
1540
.with_commit_graph(repo.commit_graph_if_enabled()?)
1641
.build()?;
1742
let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?;
18-
let file_path: &BStr = gix::path::os_str_into_bstr(file)?;
19-
20-
let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file_path)?;
43+
let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file.as_bstr())?;
2144
let statistics = outcome.statistics;
2245
write_blame_entries(out, outcome)?;
2346

gix-blame/src/file/function.rs

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,12 @@ where
6969

7070
let mut stats = Statistics::default();
7171
let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
72-
let blamed_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?
72+
let blamed_file_entry_id = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?
7373
.ok_or_else(|| Error::FileMissing {
74-
file_path: file_path.to_owned(),
75-
commit_id: suspect,
76-
})?;
77-
let blamed_file_blob = odb.find_blob(&blamed_file_entry.oid, &mut buf)?.data.to_vec();
74+
file_path: file_path.to_owned(),
75+
commit_id: suspect,
76+
})?;
77+
let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
7878
let num_lines_in_blamed = {
7979
let mut interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100);
8080
tokens_for_diffing(&blamed_file_blob)
@@ -98,6 +98,7 @@ where
9898

9999
let mut out = Vec::new();
100100
let mut diff_state = gix_diff::tree::State::default();
101+
let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
101102
'outer: while let Some(item) = traverse.next() {
102103
if hunks_to_blame.is_empty() {
103104
break;
@@ -123,15 +124,27 @@ where
123124
continue;
124125
}
125126

126-
let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? else {
127+
let mut entry = previous_entry
128+
.take()
129+
.filter(|(id, _)| *id == suspect)
130+
.map(|(_, entry)| entry);
131+
if entry.is_none() {
132+
entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?;
133+
}
134+
135+
let Some(entry_id) = entry else {
127136
continue;
128137
};
129138

130-
for parent_id in &parent_ids {
131-
if let Some(parent_entry) =
139+
for (pid, parent_id) in parent_ids.iter().enumerate() {
140+
if let Some(parent_entry_id) =
132141
find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)?
133142
{
134-
if entry.oid == parent_entry.oid {
143+
let no_change_in_entry = entry_id == parent_entry_id;
144+
if pid == 0 {
145+
previous_entry = Some((*parent_id, parent_entry_id));
146+
}
147+
if no_change_in_entry {
135148
pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
136149
continue 'outer;
137150
}
@@ -170,10 +183,8 @@ where
170183
// Do nothing under the assumption that this always (or almost always)
171184
// implies that the file comes from a different parent, compared to which
172185
// it was modified, not added.
173-
} else {
174-
if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
175-
break 'outer;
176-
}
186+
} else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
187+
break 'outer;
177188
}
178189
}
179190
gix_diff::tree::recorder::Change::Deletion { .. } => {
@@ -418,7 +429,7 @@ fn find_path_entry_in_commit(
418429
buf: &mut Vec<u8>,
419430
buf2: &mut Vec<u8>,
420431
stats: &mut Statistics,
421-
) -> Result<Option<gix_object::tree::Entry>, Error> {
432+
) -> Result<Option<ObjectId>, Error> {
422433
let commit_id = odb.find_commit(commit, buf)?.tree();
423434
stats.commits_to_tree += 1;
424435
let tree_iter = odb.find_tree_iter(&commit_id, buf)?;
@@ -430,7 +441,7 @@ fn find_path_entry_in_commit(
430441
file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
431442
)?;
432443
stats.trees_decoded -= 1;
433-
Ok(res)
444+
Ok(res.map(|e| e.oid))
434445
}
435446

436447
/// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them

0 commit comments

Comments
 (0)