Skip to content

Commit b702c29

Browse files
committed
feat: performance improvements for line statistics in ein t hours
1 parent 3596342 commit b702c29

File tree

1 file changed

+113
-89
lines changed

1 file changed

+113
-89
lines changed

gitoxide-core/src/hours/mod.rs

Lines changed: 113 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -134,113 +134,127 @@ where
134134
let (tx_tree_id, stat_threads) = needs_stats
135135
.then(|| {
136136
let (tx, rx) =
137-
crossbeam_channel::unbounded::<(u32, Option<git::hash::ObjectId>, git::hash::ObjectId)>();
137+
crossbeam_channel::unbounded::<Vec<(u32, Option<git::hash::ObjectId>, git::hash::ObjectId)>>();
138138
let stat_workers = (0..threads)
139139
.map(|_| {
140140
scope.spawn({
141141
let commit_counter = stat_counter.clone();
142142
let change_counter = change_counter.clone();
143143
let lines_counter = lines_counter.clone();
144144
let mut repo = repo.clone();
145-
repo.object_cache_size_if_unset(4 * 1024 * 1024);
145+
repo.object_cache_size_if_unset((850 * 1024 * 1024) / threads);
146146
let rx = rx.clone();
147147
move || -> Result<_, git::object::tree::diff::for_each::Error> {
148148
let mut out = Vec::new();
149-
for (commit_idx, parent_commit, commit) in rx {
150-
if let Some(c) = commit_counter.as_ref() {
151-
c.fetch_add(1, Ordering::SeqCst);
152-
}
153-
if git::interrupt::is_triggered() {
154-
return Ok(out);
155-
}
156-
let mut files = FileStats::default();
157-
let mut lines = LineStats::default();
158-
let from = match parent_commit {
159-
Some(id) => {
160-
match repo.find_object(id).ok().and_then(|c| c.peel_to_tree().ok()) {
161-
Some(tree) => tree,
162-
None => continue,
163-
}
164-
}
165-
None => repo.empty_tree(),
166-
};
167-
let to = match repo.find_object(commit).ok().and_then(|c| c.peel_to_tree().ok())
168-
{
169-
Some(c) => c,
170-
None => continue,
171-
};
172-
from.changes().track_filename().for_each_to_obtain_tree(&to, |change| {
173-
use git::object::tree::diff::change::Event::*;
174-
if let Some(c) = change_counter.as_ref() {
149+
for chunk in rx {
150+
for (commit_idx, parent_commit, commit) in chunk {
151+
if let Some(c) = commit_counter.as_ref() {
175152
c.fetch_add(1, Ordering::SeqCst);
176153
}
177-
match change.event {
178-
Addition { entry_mode, id } => {
179-
if entry_mode.is_no_tree() {
180-
files.added += 1;
181-
add_lines(line_stats, lines_counter.as_deref(), &mut lines, id);
154+
if git::interrupt::is_triggered() {
155+
return Ok(out);
156+
}
157+
let mut files = FileStats::default();
158+
let mut lines = LineStats::default();
159+
let from = match parent_commit {
160+
Some(id) => {
161+
match repo.find_object(id).ok().and_then(|c| c.peel_to_tree().ok())
162+
{
163+
Some(tree) => tree,
164+
None => continue,
182165
}
183166
}
184-
Deletion { entry_mode, id } => {
185-
if entry_mode.is_no_tree() {
186-
files.removed += 1;
187-
remove_lines(
188-
line_stats,
189-
lines_counter.as_deref(),
190-
&mut lines,
191-
id,
192-
);
193-
}
167+
None => repo.empty_tree(),
168+
};
169+
let to =
170+
match repo.find_object(commit).ok().and_then(|c| c.peel_to_tree().ok())
171+
{
172+
Some(c) => c,
173+
None => continue,
174+
};
175+
from.changes().track_filename().for_each_to_obtain_tree(&to, |change| {
176+
use git::object::tree::diff::change::Event::*;
177+
if let Some(c) = change_counter.as_ref() {
178+
c.fetch_add(1, Ordering::SeqCst);
194179
}
195-
Modification {
196-
entry_mode,
197-
previous_entry_mode,
198-
id,
199-
previous_id,
200-
} => match (previous_entry_mode.is_blob(), entry_mode.is_blob()) {
201-
(false, false) => {}
202-
(false, true) => {
203-
files.added += 1;
204-
add_lines(line_stats, lines_counter.as_deref(), &mut lines, id);
180+
match change.event {
181+
Addition { entry_mode, id } => {
182+
if entry_mode.is_no_tree() {
183+
files.added += 1;
184+
add_lines(
185+
line_stats,
186+
lines_counter.as_deref(),
187+
&mut lines,
188+
id,
189+
);
190+
}
205191
}
206-
(true, false) => {
207-
files.removed += 1;
208-
remove_lines(
209-
line_stats,
210-
lines_counter.as_deref(),
211-
&mut lines,
212-
previous_id,
213-
);
192+
Deletion { entry_mode, id } => {
193+
if entry_mode.is_no_tree() {
194+
files.removed += 1;
195+
remove_lines(
196+
line_stats,
197+
lines_counter.as_deref(),
198+
&mut lines,
199+
id,
200+
);
201+
}
214202
}
215-
(true, true) => {
216-
files.modified += 1;
217-
if line_stats {
218-
let is_text_file = mime_guess::from_path(
219-
git::path::from_bstr(change.location).as_ref(),
220-
)
221-
.first_or_text_plain()
222-
.type_()
223-
== mime_guess::mime::TEXT;
224-
if let Some(Ok(diff)) =
225-
is_text_file.then(|| change.event.diff()).flatten()
226-
{
227-
let mut nl = 0;
228-
let counts = diff.line_counts();
229-
nl += counts.insertions as usize
230-
+ counts.removals as usize;
231-
lines.added += counts.insertions as usize;
232-
lines.removed += counts.removals as usize;
233-
if let Some(c) = lines_counter.as_ref() {
234-
c.fetch_add(nl, Ordering::SeqCst);
203+
Modification {
204+
entry_mode,
205+
previous_entry_mode,
206+
id,
207+
previous_id,
208+
} => match (previous_entry_mode.is_blob(), entry_mode.is_blob()) {
209+
(false, false) => {}
210+
(false, true) => {
211+
files.added += 1;
212+
add_lines(
213+
line_stats,
214+
lines_counter.as_deref(),
215+
&mut lines,
216+
id,
217+
);
218+
}
219+
(true, false) => {
220+
files.removed += 1;
221+
remove_lines(
222+
line_stats,
223+
lines_counter.as_deref(),
224+
&mut lines,
225+
previous_id,
226+
);
227+
}
228+
(true, true) => {
229+
files.modified += 1;
230+
if line_stats {
231+
let is_text_file = mime_guess::from_path(
232+
git::path::from_bstr(change.location).as_ref(),
233+
)
234+
.first_or_text_plain()
235+
.type_()
236+
== mime_guess::mime::TEXT;
237+
if let Some(Ok(diff)) =
238+
is_text_file.then(|| change.event.diff()).flatten()
239+
{
240+
let mut nl = 0;
241+
let counts = diff.line_counts();
242+
nl += counts.insertions as usize
243+
+ counts.removals as usize;
244+
lines.added += counts.insertions as usize;
245+
lines.removed += counts.removals as usize;
246+
if let Some(c) = lines_counter.as_ref() {
247+
c.fetch_add(nl, Ordering::SeqCst);
248+
}
235249
}
236250
}
237251
}
238-
}
239-
},
240-
}
241-
Ok::<_, Infallible>(Default::default())
242-
})?;
243-
out.push((commit_idx, files, lines));
252+
},
253+
}
254+
Ok::<_, Infallible>(Default::default())
255+
})?;
256+
out.push((commit_idx, files, lines));
257+
}
244258
}
245259
Ok(out)
246260
}
@@ -253,6 +267,8 @@ where
253267

254268
let mut commit_idx = 0_u32;
255269
let mut skipped_merge_commits = 0;
270+
const CHUNK_SIZE: usize = 50;
271+
let mut chunk = Vec::with_capacity(CHUNK_SIZE);
256272
let commit_iter = interrupt::Iter::new(
257273
commit_id.ancestors(|oid, buf| {
258274
progress.inc();
@@ -271,7 +287,13 @@ where
271287
None => res,
272288
}
273289
}) {
274-
tx_tree.send((commit_idx, first_parent, commit)).ok();
290+
if chunk.len() == CHUNK_SIZE {
291+
tx_tree
292+
.send(std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE)))
293+
.ok();
294+
} else {
295+
chunk.push((commit_idx, first_parent, commit))
296+
}
275297
}
276298
commit_idx = commit_idx.checked_add(1).expect("less then 4 billion commits");
277299
git::objs::CommitRefIter::from_bytes(obj.data)
@@ -290,8 +312,10 @@ where
290312
Err(err) => return Err(err.into()),
291313
};
292314
}
315+
if let Some(tx) = tx_tree_id {
316+
tx.send(chunk).ok();
317+
}
293318
drop(tx);
294-
drop(tx_tree_id);
295319
progress.show_throughput(start);
296320
drop(progress);
297321

0 commit comments

Comments
 (0)