Skip to content

Commit e28882f

Browse files
committed
Store UnblamedHunks by ObjectId
This is a prerequisite to being able to blame commits with more than one parent.
1 parent 2d4abce commit e28882f

File tree

2 files changed

+53
-12
lines changed

2 files changed

+53
-12
lines changed

gix-blame/src/lib.rs

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#![forbid(unsafe_code)]
44

55
use std::{
6+
collections::BTreeMap,
67
ops::{Add, AddAssign, Range, SubAssign},
78
path::PathBuf,
89
};
@@ -222,6 +223,30 @@ impl UnblamedHunk {
222223
}
223224
}
224225

226+
struct UnblamedHunkRecorder {
227+
hunks: BTreeMap<ObjectId, Vec<UnblamedHunk>>,
228+
}
229+
230+
impl UnblamedHunkRecorder {
231+
fn new() -> Self {
232+
Self { hunks: BTreeMap::new() }
233+
}
234+
235+
fn add_hunk(&mut self, object_id: ObjectId, unblamed_hunk: UnblamedHunk) {
236+
self.hunks
237+
.entry(object_id)
238+
.and_modify(|unblamed_hunks| unblamed_hunks.push(unblamed_hunk.clone()))
239+
.or_insert_with(|| vec![unblamed_hunk.clone()]);
240+
}
241+
242+
fn add_hunks(&mut self, object_id: ObjectId, new_unblamed_hunks: Vec<UnblamedHunk>) {
243+
self.hunks
244+
.entry(object_id)
245+
.and_modify(|unblamed_hunks| unblamed_hunks.extend(new_unblamed_hunks.clone()))
246+
.or_insert_with(|| new_unblamed_hunks);
247+
}
248+
}
249+
225250
#[derive(Clone, Debug, PartialEq)]
226251
pub enum Change {
227252
Unchanged(Range<u32>),
@@ -742,6 +767,7 @@ pub fn blame_file<E>(
742767
traverse: impl IntoIterator<Item = Result<gix_traverse::commit::Info, E>>,
743768
resource_cache: &mut gix_diff::blob::Platform,
744769
worktree_path: PathBuf,
770+
start_id: ObjectId,
745771
file_path: &BStr,
746772
) -> Result<Vec<BlameEntry>, E> {
747773
// TODO
@@ -770,24 +796,33 @@ pub fn blame_file<E>(
770796
// TODO Verify that `imara-diff` tokenizes lines the same way `lines` does.
771797
let number_of_lines = std::fs::read_to_string(absolute_path).unwrap().lines().count();
772798

773-
let mut lines_to_blame: Vec<UnblamedHunk> = vec![UnblamedHunk::new(
774-
0..number_of_lines.try_into().unwrap(),
775-
Offset::Added(0),
776-
)];
799+
let mut unblamed_hunk_recorder = UnblamedHunkRecorder::new();
800+
unblamed_hunk_recorder.add_hunk(
801+
start_id,
802+
UnblamedHunk::new(0..number_of_lines.try_into().unwrap(), Offset::Added(0)),
803+
);
777804
let mut out: Vec<BlameEntry> = vec![];
778805

779806
for item in traverse {
780807
let item = item?;
781808
let suspect = item.id;
782809

810+
// TODO
811+
// When we call `.remove(&suspect)`, we need to make sure that all commits that have
812+
// `suspect` as their parent have already been processed. `git` accomplishes that by
813+
// keeping unprocessed commits in a priority queue sorted by commit date.
814+
let Some(hunks_to_blame) = unblamed_hunk_recorder.hunks.remove(&suspect) else {
815+
continue;
816+
};
817+
783818
let parent_ids = item.parent_ids;
784819
if parent_ids.is_empty() {
785820
// I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of
786821
// the last `item` that was yielded by `traverse`, so it makes sense to assign the
787822
// remaining lines to it, even though we don’t explicitly check whether that is true
788823
// here. We could perhaps use `needed_to_obtain` to compare `suspect` against an empty
789824
// tree to validate this assumption.
790-
out.extend(lines_to_blame.iter().map(|hunk| {
825+
out.extend(hunks_to_blame.iter().map(|hunk| {
791826
BlameEntry::new(
792827
hunk.range_in_blamed_file.clone(),
793828
// TODO
@@ -798,8 +833,6 @@ pub fn blame_file<E>(
798833
)
799834
}));
800835

801-
lines_to_blame = vec![];
802-
803836
break;
804837
}
805838

@@ -813,14 +846,16 @@ pub fn blame_file<E>(
813846

814847
let [ref modification]: [gix_diff::tree::recorder::Change] = changes_for_file_path[..] else {
815848
// None of the changes affected the file we’re currently blaming.
849+
unblamed_hunk_recorder.add_hunks(last_parent_id, hunks_to_blame);
850+
816851
continue;
817852
};
818853

819854
match modification {
820855
gix_diff::tree::recorder::Change::Addition { .. } => {
821856
// Every line that has not been blamed yet on a commit, is expected to have been
822857
// added when the file was added to the repository.
823-
out.extend(lines_to_blame.iter().map(|hunk| {
858+
out.extend(hunks_to_blame.iter().map(|hunk| {
824859
BlameEntry::new(
825860
hunk.range_in_blamed_file.clone(),
826861
// TODO
@@ -831,20 +866,20 @@ pub fn blame_file<E>(
831866
)
832867
}));
833868

834-
lines_to_blame = vec![];
835-
836869
break;
837870
}
838871
gix_diff::tree::recorder::Change::Deletion { .. } => todo!(),
839872
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
840873
let changes = get_changes(&odb, resource_cache, *oid, *previous_oid, file_path);
841874

842-
lines_to_blame = process_changes(&mut out, &lines_to_blame, &changes, suspect);
875+
let new_hunks_to_blame = process_changes(&mut out, &hunks_to_blame, &changes, suspect);
876+
877+
unblamed_hunk_recorder.add_hunks(last_parent_id, new_hunks_to_blame.clone());
843878
}
844879
}
845880
}
846881

847-
assert_eq!(lines_to_blame, vec![]);
882+
assert!(unblamed_hunk_recorder.hunks.is_empty());
848883

849884
// I don’t know yet whether it would make sense to use a data structure instead that preserves
850885
// order on insertion.

gix-blame/tests/blame.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ struct Fixture {
111111
worktree_path: PathBuf,
112112
odb: gix_odb::Handle,
113113
resource_cache: gix_diff::blob::Platform,
114+
start_id: ObjectId,
114115
commits: Vec<Result<gix_traverse::commit::Info, gix_traverse::commit::simple::Error>>,
115116
}
116117

@@ -173,6 +174,7 @@ impl Fixture {
173174
Ok(Fixture {
174175
odb,
175176
worktree_path,
177+
start_id: head_id,
176178
commits,
177179
resource_cache,
178180
})
@@ -187,6 +189,7 @@ macro_rules! mktest {
187189
worktree_path,
188190
odb,
189191
mut resource_cache,
192+
start_id,
190193
commits,
191194
} = Fixture::new().unwrap();
192195

@@ -195,6 +198,7 @@ macro_rules! mktest {
195198
commits,
196199
&mut resource_cache,
197200
worktree_path,
201+
start_id,
198202
format!("{}.txt", $case).as_str().into(),
199203
)
200204
.unwrap();
@@ -238,13 +242,15 @@ fn diff_disparity() {
238242
worktree_path,
239243
odb,
240244
mut resource_cache,
245+
start_id,
241246
commits,
242247
} = Fixture::new().unwrap();
243248
let lines_blamed = blame_file(
244249
&odb,
245250
commits,
246251
&mut resource_cache,
247252
worktree_path,
253+
start_id,
248254
format!("{case}.txt").as_str().into(),
249255
)
250256
.unwrap();

0 commit comments

Comments
 (0)