diff --git a/Cargo.lock b/Cargo.lock index f2a2d0642db..b159fd19a42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1269,10 +1269,10 @@ dependencies = [ "gix-url", "itertools", "jwalk", - "mime_guess", "rusqlite", "serde", "serde_json", + "smallvec", "tempfile", "thiserror", ] @@ -2188,8 +2188,20 @@ dependencies = [ "gix-hashtable 0.2.1", "gix-object 0.30.0", "gix-odb", + "gix-revwalk", "gix-testtools", "serde", + "thiserror", +] + +[[package]] +name = "gix-revwalk" +version = "0.1.0" +dependencies = [ + "gix-commitgraph", + "gix-hash 0.11.2", + "gix-hashtable 0.2.1", + "gix-object 0.30.0", "smallvec", "thiserror", ] @@ -2328,9 +2340,12 @@ dependencies = [ name = "gix-traverse" version = "0.26.0" dependencies = [ + "gix-commitgraph", "gix-hash 0.11.2", "gix-hashtable 0.2.1", "gix-object 0.30.0", + "gix-revwalk", + "smallvec", "thiserror", ] @@ -2338,6 +2353,7 @@ dependencies = [ name = "gix-traverse-tests" version = "0.0.0" dependencies = [ + "gix-commitgraph", "gix-hash 0.11.2", "gix-object 0.30.0", "gix-odb", @@ -3028,16 +3044,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "mime_guess" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" -dependencies = [ - "mime", - "unicase", -] - [[package]] name = "minimal-lexical" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index e02963a0b5f..e7039b88d9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -250,6 +250,7 @@ members = [ "gix-tui", "gix-tix", "gix-archive", + "gix-revwalk", "cargo-smart-release", "tests/tools", diff --git a/README.md b/README.md index 72a25df042d..e3e541c3724 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ is usable to some extent. * [gix-pathspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-pathspec) * [gix-index](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-index) * [gix-revision](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-revision) + * [gix-revwalk](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-revwalk) * [gix-command](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-command) * [gix-prompt](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-prompt) * [gix-refspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-refspec) diff --git a/cargo-smart-release/src/git/history.rs b/cargo-smart-release/src/git/history.rs index 736f2ef33fa..2176492dbad 100644 --- a/cargo-smart-release/src/git/history.rs +++ b/cargo-smart-release/src/git/history.rs @@ -43,13 +43,14 @@ pub fn collect(repo: &gix::Repository) -> anyhow::Result .id() .ancestors() .sorting(gix::traverse::commit::Sorting::ByCommitTimeNewestFirst) + .use_commit_graph(false) .all()? { - let commit_id = commit_id?; + let commit = commit_id?; let (message, tree_id, parent_tree_id, commit_time) = { let (message, tree_id, commit_time, parent_commit_id) = { - let object = commit_id.object()?; - let commit = object.to_commit_ref(); + let object = commit.object()?; + let commit = object.decode()?; let parent = commit.parents().next(); (commit.message.to_vec(), commit.tree(), commit.committer.time, parent) }; @@ -65,7 +66,7 @@ pub fn collect(repo: &gix::Repository) -> anyhow::Result Err(_) => { log::warn!( "Commit message of {} could not be decoded to UTF-8 - ignored", - commit_id.as_ref() + commit.id ); continue; } @@ -76,7 +77,7 @@ pub fn collect(repo: &gix::Repository) -> anyhow::Result data_by_tree_id.insert(tree_id, handle.find_object(tree_id)?.data.to_owned()); } items.push(commit::history::Item { - id: commit_id.detach(), + id: commit.id, commit_time, message: commit::Message::from(message), tree_id, diff --git a/crate-status.md b/crate-status.md index d2cfa252d44..28c4d37d046 100644 --- a/crate-status.md +++ b/crate-status.md @@ -477,12 +477,14 @@ Make it the best-performing implementation and the most convenient one. ### gix-revision * [x] `describe()` (similar to `git name-rev`) -* [x] primitives to help with graph traversal, along with commit-graph acceleration. * parse specifications * [x] parsing and navigation * [x] revision ranges * [ ] full date parsing support (depends on `gix-date`) +### gix-revision +* [x] primitives to help with graph traversal, along with commit-graph acceleration. + ### gix-submodule * CRUD for submodules * try to handle with all the nifty interactions and be a little more comfortable than what git offers, lay a foundation for smarter git submodules. diff --git a/gitoxide-core/Cargo.toml b/gitoxide-core/Cargo.toml index 26cfaf0c2af..89b90c7609d 100644 --- a/gitoxide-core/Cargo.toml +++ b/gitoxide-core/Cargo.toml @@ -18,7 +18,7 @@ default = [] ## Discover all git repositories within a directory. Particularly useful with [skim](https://github.com/lotabout/skim). organize = ["dep:gix-url", "dep:jwalk"] ## Derive the amount of time invested into a git repository akin to [git-hours](https://github.com/kimmobrunfeldt/git-hours). -estimate-hours = ["dep:itertools", "dep:fs-err", "dep:crossbeam-channel", "dep:mime_guess"] +estimate-hours = ["dep:itertools", "dep:fs-err", "dep:crossbeam-channel", "dep:smallvec"] ## Gather information about repositories and store it in a database for easy querying. query = ["dep:rusqlite"] @@ -64,7 +64,7 @@ jwalk = { version = "0.8.0", optional = true } itertools = { version = "0.10.1", optional = true } fs-err = { version = "2.6.0", optional = true } crossbeam-channel = { version = "0.5.6", optional = true } -mime_guess = { version = "2.0.4", optional = true } +smallvec = { version = "1.10.0", optional = true } # for 'query' rusqlite = { version = "0.29.0", optional = true, features = ["bundled"] } diff --git a/gitoxide-core/src/hours/core.rs b/gitoxide-core/src/hours/core.rs index e313296a99c..efb8c4c46d4 100644 --- a/gitoxide-core/src/hours/core.rs +++ b/gitoxide-core/src/hours/core.rs @@ -1,9 +1,14 @@ use std::collections::{hash_map::Entry, HashMap}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; use gix::bstr::BStr; +use gix::odb::FindExt; use itertools::Itertools; +use smallvec::SmallVec; -use crate::hours::{FileStats, LineStats, WorkByEmail, WorkByPerson}; +use crate::hours::util::{add_lines, remove_lines}; +use crate::hours::{CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson}; const MINUTES_PER_HOUR: f32 = 60.0; pub const HOURS_PER_WORKDAY: f32 = 8.0; @@ -61,6 +66,168 @@ pub fn estimate_hours( } } +type CommitChangeLineCounters = ( + Option>, + Option>, + Option>, +); + +type SpawnResultWithReturnChannelAndWorkers<'scope> = ( + crossbeam_channel::Sender, gix::hash::ObjectId)>>, + Vec>>>, +); + +pub fn spawn_tree_delta_threads<'scope>( + scope: &'scope std::thread::Scope<'scope, '_>, + threads: usize, + line_stats: bool, + repo: gix::Repository, + stat_counters: CommitChangeLineCounters, +) -> SpawnResultWithReturnChannelAndWorkers<'scope> { + let (tx, rx) = crossbeam_channel::unbounded::, gix::hash::ObjectId)>>(); + let stat_workers = (0..threads) + .map(|_| { + scope.spawn({ + let stats_counters = stat_counters.clone(); + let mut repo = repo.clone(); + repo.object_cache_size_if_unset((850 * 1024 * 1024) / threads); + let rx = rx.clone(); + move || -> Result<_, anyhow::Error> { + let mut out = Vec::new(); + let (commit_counter, change_counter, lines_counter) = stats_counters; + let mut attributes = line_stats + .then(|| -> anyhow::Result<_> { + repo.index_or_load_from_head().map_err(Into::into).and_then(|index| { + repo.attributes( + &index, + gix::worktree::cache::state::attributes::Source::IdMapping, + gix::worktree::cache::state::ignore::Source::IdMapping, + None, + ) + .map_err(Into::into) + .map(|attrs| { + let matches = attrs.selected_attribute_matches(["binary", "text"]); + (attrs, matches) + }) + }) + }) + .transpose()?; + for chunk in rx { + for (commit_idx, parent_commit, commit) in chunk { + if let Some(c) = commit_counter.as_ref() { + c.fetch_add(1, Ordering::SeqCst); + } + if gix::interrupt::is_triggered() { + return Ok(out); + } + let mut files = FileStats::default(); + let mut lines = LineStats::default(); + let from = match parent_commit { + Some(id) => match repo.find_object(id).ok().and_then(|c| c.peel_to_tree().ok()) { + Some(tree) => tree, + None => continue, + }, + None => repo.empty_tree(), + }; + let to = match repo.find_object(commit).ok().and_then(|c| c.peel_to_tree().ok()) { + Some(c) => c, + None => continue, + }; + from.changes()? + .track_filename() + .track_rewrites(None) + .for_each_to_obtain_tree(&to, |change| { + use gix::object::tree::diff::change::Event::*; + if let Some(c) = change_counter.as_ref() { + c.fetch_add(1, Ordering::SeqCst); + } + match change.event { + Rewrite { .. } => { + unreachable!("we turned that off") + } + Addition { entry_mode, id } => { + if entry_mode.is_no_tree() { + files.added += 1; + add_lines(line_stats, lines_counter.as_deref(), &mut lines, id); + } + } + Deletion { entry_mode, id } => { + if entry_mode.is_no_tree() { + files.removed += 1; + remove_lines(line_stats, lines_counter.as_deref(), &mut lines, id); + } + } + Modification { + entry_mode, + previous_entry_mode, + id, + previous_id, + } => { + match (previous_entry_mode.is_blob(), entry_mode.is_blob()) { + (false, false) => {} + (false, true) => { + files.added += 1; + add_lines(line_stats, lines_counter.as_deref(), &mut lines, id); + } + (true, false) => { + files.removed += 1; + remove_lines( + line_stats, + lines_counter.as_deref(), + &mut lines, + previous_id, + ); + } + (true, true) => { + files.modified += 1; + if let Some((attrs, matches)) = attributes.as_mut() { + let entry = attrs.at_entry( + change.location, + Some(false), + |id, buf| repo.objects.find_blob(id, buf), + )?; + let is_text_file = if entry.matching_attributes(matches) { + let attrs: SmallVec<[_; 2]> = + matches.iter_selected().collect(); + let binary = &attrs[0]; + let text = &attrs[1]; + !binary.assignment.state.is_set() + && !text.assignment.state.is_unset() + } else { + // In the absence of binary or text markers, we assume it's text. + true + }; + + if let Some(Ok(diff)) = + is_text_file.then(|| change.event.diff()).flatten() + { + let mut nl = 0; + let counts = diff.line_counts(); + nl += counts.insertions as usize + counts.removals as usize; + lines.added += counts.insertions as usize; + lines.removed += counts.removals as usize; + if let Some(c) = lines_counter.as_ref() { + c.fetch_add(nl, Ordering::SeqCst); + } + } + } + } + } + } + } + Ok::<_, std::io::Error>(Default::default()) + })?; + out.push((commit_idx, files, lines)); + } + } + Ok(out) + } + }) + }) + .collect::>(); + (tx, stat_workers) +} + pub fn deduplicate_identities(persons: &[WorkByEmail]) -> Vec { let mut email_to_index = HashMap::<&'static BStr, usize>::with_capacity(persons.len()); let mut name_to_index = HashMap::<&'static BStr, usize>::with_capacity(persons.len()); diff --git a/gitoxide-core/src/hours/mod.rs b/gitoxide-core/src/hours/mod.rs index f53409e0395..e46409e5761 100644 --- a/gitoxide-core/src/hours/mod.rs +++ b/gitoxide-core/src/hours/mod.rs @@ -1,10 +1,9 @@ -use std::{collections::BTreeSet, convert::Infallible, io, path::Path, sync::atomic::Ordering, time::Instant}; +use std::{collections::BTreeSet, io, path::Path, time::Instant}; -use anyhow::{anyhow, bail}; +use anyhow::bail; use gix::{ actor, bstr::{BStr, ByteSlice}, - interrupt, prelude::*, progress, Progress, }; @@ -59,33 +58,12 @@ where let threads = gix::features::parallel::num_threads(threads); let (commit_authors, stats, is_shallow, skipped_merge_commits) = { - let stat_progress = needs_stats.then(|| progress.add_child("extract stats")).map(|mut p| { - p.init(None, progress::count("commits")); - p - }); - let stat_counter = stat_progress.as_ref().and_then(|p| p.counter()); - - let change_progress = needs_stats.then(|| progress.add_child("find changes")).map(|mut p| { - p.init(None, progress::count("modified files")); - p - }); - let change_counter = change_progress.as_ref().and_then(|p| p.counter()); - - let lines_progress = line_stats.then(|| progress.add_child("find changes")).map(|mut p| { - p.init(None, progress::count("diff lines")); - p - }); - let lines_counter = lines_progress.as_ref().and_then(|p| p.counter()); - - let mut progress = progress.add_child("traverse commit graph"); - progress.init(None, progress::count("commits")); - std::thread::scope(|scope| -> anyhow::Result<_> { let start = Instant::now(); let (tx, rx) = std::sync::mpsc::channel::<(u32, Vec)>(); let mailmap = repo.open_mailmap(); - let commit_thread = scope.spawn(move || -> anyhow::Result> { + let extract_signatures = scope.spawn(move || -> anyhow::Result> { let mut out = Vec::new(); for (commit_idx, commit_data) in rx { if let Ok(author) = gix::objs::CommitRefIter::from_bytes(&commit_data) @@ -127,150 +105,37 @@ where Ok(out) }); + let (stats_progresses, stats_counters) = needs_stats + .then(|| { + let mut sp = progress.add_child("extract stats"); + sp.init(None, progress::count("commits")); + let sc = sp.counter(); + + let mut cp = progress.add_child("find changes"); + cp.init(None, progress::count("modified files")); + let cc = cp.counter(); + + let mut lp = progress.add_child("find changes"); + lp.init(None, progress::count("diff lines")); + let lc = lp.counter(); + + (Some((sp, cp, lp)), Some((sc, cc, lc))) + }) + .unwrap_or_default(); + + let mut progress = progress.add_child("traverse commit graph"); + progress.init(None, progress::count("commits")); + let (tx_tree_id, stat_threads) = needs_stats .then(|| { - let (tx, rx) = - crossbeam_channel::unbounded::, gix::hash::ObjectId)>>(); - let stat_workers = (0..threads) - .map(|_| { - scope.spawn({ - let commit_counter = stat_counter.clone(); - let change_counter = change_counter.clone(); - let lines_counter = lines_counter.clone(); - let mut repo = repo.clone(); - repo.object_cache_size_if_unset((850 * 1024 * 1024) / threads); - let rx = rx.clone(); - move || -> Result<_, anyhow::Error> { - let mut out = Vec::new(); - for chunk in rx { - for (commit_idx, parent_commit, commit) in chunk { - if let Some(c) = commit_counter.as_ref() { - c.fetch_add(1, Ordering::SeqCst); - } - if gix::interrupt::is_triggered() { - return Ok(out); - } - let mut files = FileStats::default(); - let mut lines = LineStats::default(); - let from = match parent_commit { - Some(id) => { - match repo.find_object(id).ok().and_then(|c| c.peel_to_tree().ok()) - { - Some(tree) => tree, - None => continue, - } - } - None => repo.empty_tree(), - }; - let to = - match repo.find_object(commit).ok().and_then(|c| c.peel_to_tree().ok()) - { - Some(c) => c, - None => continue, - }; - from.changes()? - .track_filename() - .track_rewrites(None) - .for_each_to_obtain_tree(&to, |change| { - use gix::object::tree::diff::change::Event::*; - if let Some(c) = change_counter.as_ref() { - c.fetch_add(1, Ordering::SeqCst); - } - match change.event { - Rewrite { .. } => { - unreachable!("we turned that off") - } - Addition { entry_mode, id } => { - if entry_mode.is_no_tree() { - files.added += 1; - add_lines( - line_stats, - lines_counter.as_deref(), - &mut lines, - id, - ); - } - } - Deletion { entry_mode, id } => { - if entry_mode.is_no_tree() { - files.removed += 1; - remove_lines( - line_stats, - lines_counter.as_deref(), - &mut lines, - id, - ); - } - } - Modification { - entry_mode, - previous_entry_mode, - id, - previous_id, - } => { - match (previous_entry_mode.is_blob(), entry_mode.is_blob()) - { - (false, false) => {} - (false, true) => { - files.added += 1; - add_lines( - line_stats, - lines_counter.as_deref(), - &mut lines, - id, - ); - } - (true, false) => { - files.removed += 1; - remove_lines( - line_stats, - lines_counter.as_deref(), - &mut lines, - previous_id, - ); - } - (true, true) => { - files.modified += 1; - if line_stats { - // TODO: replace this with proper git-attributes - this isn't - // really working, can't see shell scripts for example. - let is_text_file = mime_guess::from_path( - gix::path::from_bstr(change.location) - .as_ref(), - ) - .first_or_text_plain() - .type_() - == mime_guess::mime::TEXT; - if let Some(Ok(diff)) = is_text_file - .then(|| change.event.diff()) - .flatten() - { - let mut nl = 0; - let counts = diff.line_counts(); - nl += counts.insertions as usize - + counts.removals as usize; - lines.added += counts.insertions as usize; - lines.removed += counts.removals as usize; - if let Some(c) = lines_counter.as_ref() { - c.fetch_add(nl, Ordering::SeqCst); - } - } - } - } - } - } - } - Ok::<_, Infallible>(Default::default()) - })?; - out.push((commit_idx, files, lines)); - } - } - Ok(out) - } - }) - }) - .collect::>(); - (Some(tx), stat_workers) + let (tx, threads) = spawn_tree_delta_threads( + scope, + threads, + line_stats, + repo.clone(), + stats_counters.clone().expect("counters are set"), + ); + (Some(tx), threads) }) .unwrap_or_default(); @@ -278,24 +143,31 @@ where let mut skipped_merge_commits = 0; const CHUNK_SIZE: usize = 50; let mut chunk = Vec::with_capacity(CHUNK_SIZE); - let commit_iter = interrupt::Iter::new( - commit_id.ancestors(|oid, buf| { - progress.inc(); - repo.objects.find(oid, buf).map(|obj| { - tx.send((commit_idx, obj.data.to_owned())).ok(); - if let Some((tx_tree, first_parent, commit)) = tx_tree_id.as_ref().and_then(|tx| { - let mut parents = gix::objs::CommitRefIter::from_bytes(obj.data).parent_ids(); - let res = parents + let mut commit_iter = commit_id.ancestors(|oid, buf| repo.objects.find_commit_iter(oid, buf)); + let mut is_shallow = false; + while let Some(c) = commit_iter.next() { + progress.inc(); + if gix::interrupt::is_triggered() { + bail!("Cancelled by user"); + } + match c { + Ok(c) => { + tx.send((commit_idx, commit_iter.commit_data().to_owned())).ok(); + let tree_delta_info = tx_tree_id.as_ref().and_then(|tx| { + let mut parents = c.parent_ids.into_iter(); + parents .next() - .map(|first_parent| (tx, Some(first_parent), oid.to_owned())); - match parents.next() { - Some(_) => { - skipped_merge_commits += 1; - None - } - None => res, - } - }) { + .map(|first_parent| (tx, Some(first_parent), c.id.to_owned())) + .filter(|_| { + if parents.next().is_some() { + skipped_merge_commits += 1; + false + } else { + true + } + }) + }); + if let Some((tx_tree, first_parent, commit)) = tree_delta_info { if chunk.len() == CHUNK_SIZE { tx_tree .send(std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE))) @@ -304,16 +176,8 @@ where chunk.push((commit_idx, first_parent, commit)) } } - commit_idx = commit_idx.checked_add(1).expect("less then 4 billion commits"); - gix::objs::CommitRefIter::from_bytes(obj.data) - }) - }), - || anyhow!("Cancelled by user"), - ); - let mut is_shallow = false; - for c in commit_iter { - match c? { - Ok(c) => c, + commit_idx += 1; + } Err(gix::traverse::commit::ancestors::Error::FindExisting { .. }) => { is_shallow = true; break; @@ -328,9 +192,9 @@ where progress.show_throughput(start); drop(progress); - let stats_by_commit_idx = match stat_progress { - Some(mut progress) => { - progress.set_max(Some(commit_idx as usize - skipped_merge_commits)); + let stats_by_commit_idx = match stats_progresses { + Some((mut stat_progress, change_progress, line_progress)) => { + stat_progress.set_max(Some(commit_idx as usize - skipped_merge_commits)); let mut stats = Vec::new(); for handle in stat_threads { stats.extend(handle.join().expect("no panic")?); @@ -339,20 +203,16 @@ where } } stats.sort_by_key(|t| t.0); - progress.show_throughput(start); + stat_progress.show_throughput(start); + change_progress.show_throughput(start); + line_progress.show_throughput(start); stats } None => Vec::new(), }; - if let Some(progress) = change_progress { - progress.show_throughput(start); - } - if let Some(progress) = lines_progress { - progress.show_throughput(start); - } Ok(( - commit_thread.join().expect("no panic")?, + extract_signatures.join().expect("no panic")?, stats_by_commit_idx, is_shallow, skipped_merge_commits, @@ -476,4 +336,5 @@ mod core; use self::core::{deduplicate_identities, estimate_hours, HOURS_PER_WORKDAY}; mod util; -use util::{add_lines, remove_lines, FileStats, LineStats, WorkByEmail, WorkByPerson}; +use crate::hours::core::spawn_tree_delta_threads; +use util::{CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson}; diff --git a/gitoxide-core/src/hours/util.rs b/gitoxide-core/src/hours/util.rs index 5738efd8ac3..ef94b5de513 100644 --- a/gitoxide-core/src/hours/util.rs +++ b/gitoxide-core/src/hours/util.rs @@ -155,6 +155,9 @@ impl LineStats { } } +/// An index able to address any commit +pub type CommitIdx = u32; + pub fn add_lines(line_stats: bool, lines_counter: Option<&AtomicUsize>, mut lines: &mut LineStats, id: gix::Id<'_>) { if let Some(Ok(blob)) = line_stats.then(|| id.object()) { let nl = blob.data.lines_with_terminator().count(); diff --git a/gitoxide-core/src/pack/create.rs b/gitoxide-core/src/pack/create.rs index 9d046440570..0d58301bc2a 100644 --- a/gitoxide-core/src/pack/create.rs +++ b/gitoxide-core/src/pack/create.rs @@ -141,7 +141,7 @@ where let handle = handle.clone(); move |oid, buf| handle.find_commit_iter(oid, buf).map(|t| t.0) }) - .map(|res| res.map_err(Into::into)) + .map(|res| res.map_err(Into::into).map(|c| c.id)) .inspect(move |_| progress.inc()), ); (handle, iter) diff --git a/gitoxide-core/src/query/engine/update.rs b/gitoxide-core/src/query/engine/update.rs index c4ec3c5dc2b..a9bcf389984 100644 --- a/gitoxide-core/src/query/engine/update.rs +++ b/gitoxide-core/src/query/engine/update.rs @@ -346,7 +346,7 @@ pub fn update( ); let mut commits = Vec::new(); for c in commit_iter { - match c? { + match c?.map(|c| c.id) { Ok(c) => { if known_commits.binary_search(&c).is_err() { commits.push(c); diff --git a/gitoxide-core/src/repository/attributes/query.rs b/gitoxide-core/src/repository/attributes/query.rs index 730c0fbfec3..a3f5d2c9196 100644 --- a/gitoxide-core/src/repository/attributes/query.rs +++ b/gitoxide-core/src/repository/attributes/query.rs @@ -1,5 +1,3 @@ -use gix::odb::FindExt; - use crate::OutputFormat; pub struct Options { @@ -63,47 +61,8 @@ pub(crate) mod function { } } -pub(crate) enum Index { - Shared(gix::worktree::Index), - Owned(gix::index::File), -} - -impl std::ops::Deref for Index { - type Target = gix::index::File; - - fn deref(&self) -> &Self::Target { - match self { - Index::Shared(i) => i, - Index::Owned(i) => i, - } - } -} - -impl Index { - pub fn into_owned(self) -> gix::index::File { - match self { - Index::Shared(i) => gix::index::File::clone(&i), - Index::Owned(i) => i, - } - } -} - -pub(crate) fn index_on_demand(repo: &gix::Repository) -> anyhow::Result { - Ok(match repo.index() { - Ok(index) => Index::Shared(index), - Err(gix::worktree::open_index::Error::IndexFile(_)) => { - let tree = repo.head_commit()?.tree_id()?; - Index::Owned(gix::index::File::from_state( - gix::index::State::from_tree(&tree, |oid, buf| repo.objects.find_tree_iter(oid, buf).ok())?, - repo.git_dir().join("index"), - )) - } - Err(err) => return Err(err.into()), - }) -} - pub(crate) fn attributes_cache(repo: &gix::Repository) -> anyhow::Result { - let index = index_on_demand(repo)?; + let index = repo.index_or_load_from_head()?; Ok(repo.attributes( &index, if repo.is_bare() { diff --git a/gitoxide-core/src/repository/attributes/validate_baseline.rs b/gitoxide-core/src/repository/attributes/validate_baseline.rs index acef1e0345b..afffce85c67 100644 --- a/gitoxide-core/src/repository/attributes/validate_baseline.rs +++ b/gitoxide-core/src/repository/attributes/validate_baseline.rs @@ -21,10 +21,7 @@ pub(crate) mod function { use gix::{odb::FindExt, Progress}; use crate::{ - repository::attributes::{ - query::{attributes_cache, index_on_demand}, - validate_baseline::Options, - }, + repository::attributes::{query::attributes_cache, validate_baseline::Options}, OutputFormat, }; @@ -59,7 +56,7 @@ pub(crate) mod function { let repo = repo.clone(); let num_entries = &mut num_entries; move || -> anyhow::Result<_> { - let index = index_on_demand(&repo)?.into_owned(); + let index = repo.index_or_load_from_head()?.into_owned(); let (entries, path_backing) = index.into_parts().0.into_entries(); *num_entries = Some(entries.len()); let iter = Box::new(entries.into_iter().map(move |e| { diff --git a/gitoxide-core/src/repository/index/entries.rs b/gitoxide-core/src/repository/index/entries.rs index 29bed5a0081..c64559020e3 100644 --- a/gitoxide-core/src/repository/index/entries.rs +++ b/gitoxide-core/src/repository/index/entries.rs @@ -22,10 +22,7 @@ pub(crate) mod function { use gix::odb::FindExt; - use crate::repository::{ - attributes::query::index_on_demand, - index::entries::{Attributes, Options}, - }; + use crate::repository::index::entries::{Attributes, Options}; pub fn entries( repo: gix::Repository, @@ -38,7 +35,7 @@ pub(crate) mod function { }: Options, ) -> anyhow::Result<()> { use crate::OutputFormat::*; - let index = index_on_demand(&repo)?; + let index = repo.index_or_load_from_head()?; let mut cache = attributes .map(|attrs| { repo.attributes( diff --git a/gitoxide-core/src/repository/revision/list.rs b/gitoxide-core/src/repository/revision/list.rs index cce6c5bbc2d..b8a7330cdc5 100644 --- a/gitoxide-core/src/repository/revision/list.rs +++ b/gitoxide-core/src/repository/revision/list.rs @@ -27,7 +27,7 @@ pub fn list( .id .attach(&repo); for commit in commit_id.ancestors().all()? { - writeln!(out, "{}", commit?.to_hex())?; + writeln!(out, "{}", commit?.id().to_hex())?; } Ok(()) } diff --git a/gix-attributes/src/state.rs b/gix-attributes/src/state.rs index 27ce2a2479e..ffc0bce41db 100644 --- a/gix-attributes/src/state.rs +++ b/gix-attributes/src/state.rs @@ -69,6 +69,16 @@ impl StateRef<'_> { pub fn is_unspecified(&self) -> bool { matches!(self, StateRef::Unspecified) } + + /// Return `true` if the associated attribute was set with `attr`. Note that this will also be `true` if a value is assigned. + pub fn is_set(&self) -> bool { + matches!(self, StateRef::Set | StateRef::Value(_)) + } + + /// Return `true` if the associated attribute was set with `-attr` to specifically remove it. + pub fn is_unset(&self) -> bool { + matches!(self, StateRef::Unset) + } } /// Initialization diff --git a/gix-diff/tests/tree/mod.rs b/gix-diff/tests/tree/mod.rs index 836269a4543..5b42de3f1be 100644 --- a/gix-diff/tests/tree/mod.rs +++ b/gix-diff/tests/tree/mod.rs @@ -151,13 +151,13 @@ mod changes { .map(|c| { use gix_odb::FindExt; ( - db.find_commit(c, &mut buf) + db.find_commit(c.id, &mut buf) .unwrap() .message .trim() .to_str_lossy() .into_owned(), - c, + c.id, ) }) .rev() diff --git a/gix-negotiate/src/consecutive.rs b/gix-negotiate/src/consecutive.rs index e8fb90e1605..f213cd94982 100644 --- a/gix-negotiate/src/consecutive.rs +++ b/gix-negotiate/src/consecutive.rs @@ -57,7 +57,7 @@ impl Algorithm { self.non_common_revs -= 1; } } - while let Some((id, generation)) = queue.pop() { + while let Some((id, generation)) = queue.pop_value() { if graph .get(&id) .map_or(true, |commit| !commit.data.flags.contains(Flags::SEEN)) @@ -106,7 +106,7 @@ impl Negotiator for Algorithm { fn next_have(&mut self, graph: &mut crate::Graph<'_>) -> Option> { loop { - let id = self.revs.pop().filter(|_| self.non_common_revs != 0)?; + let id = self.revs.pop_value().filter(|_| self.non_common_revs != 0)?; let commit = graph.get_mut(&id).expect("it was added to the graph by now"); let flags = &mut commit.data.flags; *flags |= Flags::POPPED; diff --git a/gix-negotiate/src/skipping.rs b/gix-negotiate/src/skipping.rs index c1a21bf8d28..06841b68797 100644 --- a/gix-negotiate/src/skipping.rs +++ b/gix-negotiate/src/skipping.rs @@ -42,7 +42,7 @@ impl Algorithm { .filter(|_| !is_common) { let mut queue = gix_revision::PriorityQueue::from_iter(Some((commit.commit_time, id))); - while let Some(id) = queue.pop() { + while let Some(id) = queue.pop_value() { if let Some(commit) = graph.try_lookup_or_insert_commit(id, |entry| { if !entry.flags.contains(Flags::POPPED) { self.non_common_revs -= 1; @@ -135,7 +135,7 @@ impl Negotiator for Algorithm { fn next_have(&mut self, graph: &mut crate::Graph<'_>) -> Option> { loop { - let id = self.revs.pop().filter(|_| self.non_common_revs != 0)?; + let id = self.revs.pop_value().filter(|_| self.non_common_revs != 0)?; let commit = graph.get_mut(&id).expect("it was added to the graph by now"); commit.data.flags |= Flags::POPPED; diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index b54421e5cdd..9277d53ffd1 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -246,6 +246,7 @@ fn traversals() -> crate::Result { move |oid, buf| db.find_commit_iter(oid, buf).map(|t| t.0) }) .map(Result::unwrap) + .map(|c| c.id) .collect::>(); if let Some(take) = take { commits.resize(take, gix_hash::Kind::Sha1.null()); diff --git a/gix-revision/Cargo.toml b/gix-revision/Cargo.toml index 894005d6ae2..eec341e17f2 100644 --- a/gix-revision/Cargo.toml +++ b/gix-revision/Cargo.toml @@ -21,17 +21,17 @@ gix-hash = { version = "^0.11.2", path = "../gix-hash" } gix-object = { version = "^0.30.0", path = "../gix-object" } gix-date = { version = "^0.5.1", path = "../gix-date" } gix-hashtable = { version = "^0.2.1", path = "../gix-hashtable" } -gix-commitgraph = { version = "^0.16.0", path = "../gix-commitgraph" } +gix-revwalk = { version = "^0.1.0", path = "../gix-revwalk" } bstr = { version = "1.3.0", default-features = false, features = ["std"]} thiserror = "1.0.26" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } -smallvec = "1.10.0" document-features = { version = "0.2.1", optional = true } [dev-dependencies] gix-odb = { path = "../gix-odb" } gix-testtools = { path = "../tests/tools" } +gix-commitgraph = { path = "../gix-commitgraph" } [package.metadata.docs.rs] all-features = true diff --git a/gix-revision/src/describe.rs b/gix-revision/src/describe.rs index 745e8c1c31e..2c16364777e 100644 --- a/gix-revision/src/describe.rs +++ b/gix-revision/src/describe.rs @@ -200,7 +200,7 @@ pub(crate) mod function { graph.clear(); graph.insert(commit.to_owned(), 0u32); - while let Some(commit) = queue.pop() { + while let Some(commit) = queue.pop_value() { commits_seen += 1; let flags = if let Some(name) = name_by_oid.get(&commit) { if candidates.len() < max_candidates { @@ -324,7 +324,7 @@ pub(crate) mod function { first_parent: bool, ) -> Result { let mut commits_seen = 0; - while let Some(commit) = queue.pop() { + while let Some(commit) = queue.pop_value() { commits_seen += 1; let flags = graph[&commit]; if (flags & best_candidate.identity_bit) == best_candidate.identity_bit { diff --git a/gix-revision/src/lib.rs b/gix-revision/src/lib.rs index dcc92cd6529..31d22768cce 100644 --- a/gix-revision/src/lib.rs +++ b/gix-revision/src/lib.rs @@ -16,46 +16,4 @@ pub use describe::function::describe; pub mod spec; pub use spec::types::Spec; -/// A graph of commits which additionally allows to associate data with commits. -/// -/// It starts empty, but each access may fill it with commit information. -/// Note that the traversal can be accelerated if a [commit-graph][gix_commitgraph::Graph] is also made available. -/// -/// ### About replacements -/// -/// Object replacements is an object database feature to substitute one object with another. We assume that this is transparently -/// implemented by the `find` function that returns objects. Also we assume that the commitgraph as been written with replacements -/// active to provide a consistent view. -/// -/// ### Odb or `find` configuration -/// -/// The `find` handle should be setup to *quickly determine if an object exists or not* to assure quick operation *on shallow repositories*. -/// This typically means that it should not re-read the odb if there is an object miss. -/// -/// Most usage of the Graph will benefit from fast ODB lookups, so setting up an object cache will be beneficial. If that's not the case, -/// the method docs will inform about that. -/// -/// Additionally, and only if `T` is [`Commit`][graph::Commit], there is *no need for an object cache* as we keep track of -/// everything related to commit traversal in our own hashmap. -pub struct Graph<'find, T> { - /// A way to resolve a commit from the object database. - find: Box>, - /// A way to speedup commit access, essentially a multi-file commit database. - cache: Option, - /// The set of cached commits that we have seen once, along with data associated with them. - set: gix_hashtable::HashMap, - /// A buffer for writing commit data into. - buf: Vec, - /// Another buffer we typically use to store parents. - parent_buf: Vec, -} -/// -pub mod graph; - -/// A utility type implementing a queue which can be used to automatically sort data by its time in ascending order. -/// -/// Note that the performance of this queue is very relevant to overall algorithm performance of many graph-walking algorithms, -/// and as it stands our implementation is about 6% slower in practice, probably also depending on the size of the stored data. -#[derive(Default)] -pub struct PriorityQueue(std::collections::BinaryHeap>); -mod queue; +pub use gix_revwalk::{graph, Graph, PriorityQueue}; diff --git a/gix-revision/tests/graph/mod.rs b/gix-revision/tests/graph/mod.rs deleted file mode 100644 index 3b03dee2f66..00000000000 --- a/gix-revision/tests/graph/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -#[test] -fn size_of_commit() { - assert_eq!( - std::mem::size_of::>(), - 48, - "We might see quite a lot of these, so they shouldn't grow unexpectedly" - ) -} diff --git a/gix-revision/tests/revision.rs b/gix-revision/tests/revision.rs index 1b469d3e908..71d63cb4057 100644 --- a/gix-revision/tests/revision.rs +++ b/gix-revision/tests/revision.rs @@ -1,5 +1,4 @@ mod describe; -mod graph; mod spec; pub type Result = std::result::Result>; diff --git a/gix-revwalk/Cargo.toml b/gix-revwalk/Cargo.toml new file mode 100644 index 00000000000..d9bea9a7851 --- /dev/null +++ b/gix-revwalk/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "gix-revwalk" +version = "0.1.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A crate providing utilities for walking the revision graph" +authors = ["Sebastian Thiel "] +edition = "2021" +include = ["src/**/*", "LICENSE-*", "CHANGELOG.md"] +rust-version = "1.64" + +[lib] +doctest = false + +[dependencies] +gix-hash = { version = "^0.11.2", path = "../gix-hash" } +gix-object = { version = "^0.30.0", path = "../gix-object" } +gix-hashtable = { version = "^0.2.1", path = "../gix-hashtable" } +gix-commitgraph = { version = "^0.16.0", path = "../gix-commitgraph" } + +thiserror = "1.0.26" +smallvec = "1.10.0" diff --git a/gix-revwalk/LICENSE-APACHE b/gix-revwalk/LICENSE-APACHE new file mode 120000 index 00000000000..965b606f331 --- /dev/null +++ b/gix-revwalk/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/gix-revwalk/LICENSE-MIT b/gix-revwalk/LICENSE-MIT new file mode 120000 index 00000000000..76219eb72e8 --- /dev/null +++ b/gix-revwalk/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/gix-revision/src/graph/commit.rs b/gix-revwalk/src/graph/commit.rs similarity index 100% rename from gix-revision/src/graph/commit.rs rename to gix-revwalk/src/graph/commit.rs diff --git a/gix-revision/src/graph/errors.rs b/gix-revwalk/src/graph/errors.rs similarity index 100% rename from gix-revision/src/graph/errors.rs rename to gix-revwalk/src/graph/errors.rs diff --git a/gix-revision/src/graph/mod.rs b/gix-revwalk/src/graph/mod.rs similarity index 96% rename from gix-revision/src/graph/mod.rs rename to gix-revwalk/src/graph/mod.rs index 016356cc1ef..cf7e1629e51 100644 --- a/gix-revision/src/graph/mod.rs +++ b/gix-revwalk/src/graph/mod.rs @@ -173,6 +173,9 @@ impl<'find, T: Default> Graph<'find, Commit> { /// `update_data(data)` gets run either on existing or on new data. /// /// Note that none of the data updates happen if `id` didn't exist. + /// + /// If only commit data is desired without the need for attaching custom data, use + /// [`try_lookup(id).to_owned()`][Graph::try_lookup()] instead. pub fn try_lookup_or_insert_commit( &mut self, id: gix_hash::ObjectId, @@ -190,6 +193,9 @@ impl<'find, T> Graph<'find, T> { /// Return the commit when done. /// /// Note that none of the data updates happen if `id` didn't exist. + /// + /// If only commit data is desired without the need for attaching custom data, use + /// [`try_lookup(id)`][Graph::try_lookup()] instead. pub fn try_lookup_or_insert_default( &mut self, id: gix_hash::ObjectId, @@ -296,6 +302,8 @@ where } /// A commit that provides access to graph-related information, on demand. +/// +/// The owned version of this type is called [`Commit`] and can be obtained by calling [`LazyCommit::to_owned()`]. pub struct LazyCommit<'graph> { backing: Either<&'graph [u8], (&'graph gix_commitgraph::Graph, gix_commitgraph::Position)>, } diff --git a/gix-revwalk/src/lib.rs b/gix-revwalk/src/lib.rs new file mode 100644 index 00000000000..44c5ca3e704 --- /dev/null +++ b/gix-revwalk/src/lib.rs @@ -0,0 +1,49 @@ +//! Utility types for traversing the git commit-graph. +//! +//! This crate considers itself very much *plumbing* and is meant for consumption by other plumbing crates. +#![deny(missing_docs, rust_2018_idioms)] +#![forbid(unsafe_code)] + +/// A graph of commits which additionally allows to associate data with commits. +/// +/// It starts empty, but each access may fill it with commit information. +/// Note that the traversal can be accelerated if a [commit-graph][gix_commitgraph::Graph] is also made available. +/// +/// ### About replacements +/// +/// Object replacements is an object database feature to substitute one object with another. We assume that this is transparently +/// implemented by the `find` function that returns objects. Also we assume that the commitgraph as been written with replacements +/// active to provide a consistent view. +/// +/// ### Odb or `find` configuration +/// +/// The `find` handle should be setup to *quickly determine if an object exists or not* to assure quick operation *on shallow repositories*. +/// This typically means that it should not re-read the odb if there is an object miss. +/// +/// Most usage of the Graph will benefit from fast ODB lookups, so setting up an object cache will be beneficial. If that's not the case, +/// the method docs will inform about that. +/// +/// Additionally, and only if `T` is [`Commit`][graph::Commit], there is *no need for an object cache* as we keep track of +/// everything related to commit traversal in our own hashmap. +pub struct Graph<'find, T> { + /// A way to resolve a commit from the object database. + find: Box>, + /// A way to speedup commit access, essentially a multi-file commit database. + cache: Option, + /// The set of cached commits that we have seen once, along with data associated with them. + set: gix_hashtable::HashMap, + /// A buffer for writing commit data into. + buf: Vec, + /// Another buffer we typically use to store parents. + parent_buf: Vec, +} +/// +pub mod graph; + +/// A utility type implementing a queue which can be used to automatically sort data by its time in ascending order. +/// +/// Note that the performance of this queue is very relevant to overall algorithm performance of many graph-walking algorithms, +/// and as it stands our implementation is about 6% slower in practice, probably also depending on the size of the stored data. +#[derive(Default)] +pub struct PriorityQueue(std::collections::BinaryHeap>); +mod queue; diff --git a/gix-revision/src/queue.rs b/gix-revwalk/src/queue.rs similarity index 85% rename from gix-revision/src/queue.rs rename to gix-revwalk/src/queue.rs index fec235d57fd..1c79389650a 100644 --- a/gix-revision/src/queue.rs +++ b/gix-revwalk/src/queue.rs @@ -72,11 +72,16 @@ impl PriorityQueue { self.0.push(Item { key, value }); } - /// Pop the highest-priority item off the queue. - pub fn pop(&mut self) -> Option { + /// Pop the highest-priority item value off the queue. + pub fn pop_value(&mut self) -> Option { self.0.pop().map(|t| t.value) } + /// Pop the highest-priority item key and value off the queue. + pub fn pop(&mut self) -> Option<(K, T)> { + self.0.pop().map(|t| (t.key, t.value)) + } + /// Iterate all items ordered from highest to lowest priority. pub fn iter_unordered(&self) -> impl Iterator { self.0.iter().map(|t| &t.value) @@ -92,10 +97,15 @@ impl PriorityQueue { self.0.is_empty() } - /// Returns the greatest item `(K, T)` tuple, as ordered by `K` if the queue is not empty. + /// Returns the greatest item `(K, T)` tuple, as ordered by `K`, if the queue is not empty, without removing it. pub fn peek(&self) -> Option<(&K, &T)> { self.0.peek().map(|e| (&e.key, &e.value)) } + + /// Drop all items from the queue, without changing its capacity. + pub fn clear(&mut self) { + self.0.clear() + } } impl FromIterator<(K, T)> for PriorityQueue { diff --git a/gix-revwalk/tests/revwalk.rs b/gix-revwalk/tests/revwalk.rs new file mode 100644 index 00000000000..d76f6cff57a --- /dev/null +++ b/gix-revwalk/tests/revwalk.rs @@ -0,0 +1,12 @@ +mod graph { + mod commit { + #[test] + fn size_of_commit() { + assert_eq!( + std::mem::size_of::>(), + 48, + "We might see quite a lot of these, so they shouldn't grow unexpectedly" + ) + } + } +} diff --git a/gix-traverse/Cargo.toml b/gix-traverse/Cargo.toml index 816ab817010..dbfb816bd38 100644 --- a/gix-traverse/Cargo.toml +++ b/gix-traverse/Cargo.toml @@ -17,4 +17,7 @@ doctest = false gix-hash = { version = "^0.11.2", path = "../gix-hash" } gix-object = { version = "^0.30.0", path = "../gix-object" } gix-hashtable = { version = "^0.2.1", path = "../gix-hashtable" } +gix-revwalk = { version = "^0.1.0", path = "../gix-revwalk" } +gix-commitgraph = { version = "^0.16.0", path = "../gix-commitgraph" } +smallvec = "1.10.0" thiserror = "1.0.32" diff --git a/gix-traverse/src/commit.rs b/gix-traverse/src/commit.rs index 95d48842a9a..ec1f9a3caa0 100644 --- a/gix-traverse/src/commit.rs +++ b/gix-traverse/src/commit.rs @@ -1,6 +1,9 @@ +use smallvec::SmallVec; + /// An iterator over the ancestors one or more starting commits pub struct Ancestors { find: Find, + cache: Option, predicate: Predicate, state: StateMut, parents: Parents, @@ -18,15 +21,36 @@ pub enum Parents { } /// Specify how to sort commits during traversal. +/// +/// ### Sample History +/// +/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp +/// (*their X-alignment doesn't matter*). +/// +/// ```text +/// ---1----2----4----7 <- second parent of 8 +/// \ \ +/// 3----5----6----8--- +/// ``` + #[derive(Default, Debug, Copy, Clone)] pub enum Sorting { /// Commits are sorted as they are mentioned in the commit graph. + /// + /// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1` + /// + /// ### Note + /// + /// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from + /// as it avoids overlapping branches. #[default] - Topological, + BreadthFirst, /// Commits are sorted by their commit time in descending order, that is newest first. /// /// The sorting applies to all currently queued commit ids and thus is full. /// + /// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` + /// /// # Performance /// /// This mode benefits greatly from having an object_cache in `find()` @@ -36,25 +60,44 @@ pub enum Sorting { /// a given time, stopping the iteration once no younger commits is queued to be traversed. /// /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache. + /// + /// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4` ByCommitTimeNewestFirstCutoffOlderThan { /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. time_in_seconds_since_epoch: u32, }, } +/// The collection of parent ids we saw as part of the iteration. +/// +/// Note that this list is truncated if [`Parents::First`] was used. +pub type ParentIds = SmallVec<[gix_hash::ObjectId; 1]>; + +/// Information about a commit that we obtained naturally as part of the iteration. +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct Info { + /// The id of the commit. + pub id: gix_hash::ObjectId, + /// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`] is enabled. + pub parent_ids: ParentIds, + /// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::BreadthFirst`], as the walk + /// needs to require the commit-date. + pub commit_time: Option, +} + /// pub mod ancestors { + use smallvec::SmallVec; use std::{ borrow::{Borrow, BorrowMut}, collections::VecDeque, - iter::FromIterator, }; use gix_hash::{oid, ObjectId}; use gix_hashtable::HashSet; use gix_object::CommitRefIter; - use crate::commit::{Ancestors, Parents, Sorting}; + use crate::commit::{collect_parents, Ancestors, Either, Info, ParentIds, Parents, Sorting}; /// The error is part of the item returned by the [Ancestors] iterator. #[derive(Debug, thiserror::Error)] @@ -72,31 +115,38 @@ pub mod ancestors { type TimeInSeconds = u32; /// The state used and potentially shared by multiple graph traversals. - #[derive(Default, Clone)] + #[derive(Clone)] pub struct State { - next: VecDeque<(ObjectId, TimeInSeconds)>, + next: VecDeque, + queue: gix_revwalk::PriorityQueue, buf: Vec, seen: HashSet, parents_buf: Vec, + parent_ids: SmallVec<[(ObjectId, u64); 2]>, + } + + impl Default for State { + fn default() -> Self { + State { + next: Default::default(), + queue: gix_revwalk::PriorityQueue::new(), + buf: vec![], + seen: Default::default(), + parents_buf: vec![], + parent_ids: Default::default(), + } + } } impl State { fn clear(&mut self) { self.next.clear(); + self.queue.clear(); self.buf.clear(); self.seen.clear(); } } - /// Builder - impl Ancestors { - /// Change our commit parent handling mode to the given one. - pub fn parents(mut self, mode: Parents) -> Self { - self.parents = mode; - self - } - } - /// Builder impl Ancestors where @@ -107,32 +157,61 @@ pub mod ancestors { /// Set the sorting method, either topological or by author date pub fn sorting(mut self, sorting: Sorting) -> Result { self.sorting = sorting; - if !matches!(self.sorting, Sorting::Topological) { - let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new())); - let state = self.state.borrow_mut(); - for (commit_id, commit_time) in &mut state.next { - let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting { - oid: *commit_id, - source: err.into(), - })?; - let time = commit_iter.committer()?.time.seconds_since_unix_epoch; - match &mut cutoff_time_storage { - Some((cutoff_time, storage)) if time >= *cutoff_time => { - storage.push((*commit_id, time)); + match self.sorting { + Sorting::BreadthFirst => { + self.queue_to_vecdeque(); + } + Sorting::ByCommitTimeNewestFirst | Sorting::ByCommitTimeNewestFirstCutoffOlderThan { .. } => { + let cutoff_time = self.sorting.cutoff_time(); + let state = self.state.borrow_mut(); + for commit_id in state.next.drain(..) { + let commit_iter = + (self.find)(&commit_id, &mut state.buf).map_err(|err| Error::FindExisting { + oid: commit_id, + source: err.into(), + })?; + let time = commit_iter.committer()?.time.seconds_since_unix_epoch; + match cutoff_time { + Some(cutoff_time) if time >= cutoff_time => { + state.queue.insert(time, commit_id); + } + Some(_) => {} + None => { + state.queue.insert(time, commit_id); + } } - Some(_) => {} - None => *commit_time = time, } } - let mut v = match cutoff_time_storage { - Some((_, storage)) => storage, - None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()), - }; - v.sort_by(|a, b| a.1.cmp(&b.1).reverse()); - state.next = v.into(); } Ok(self) } + + /// Change our commit parent handling mode to the given one. + pub fn parents(mut self, mode: Parents) -> Self { + self.parents = mode; + if matches!(self.parents, Parents::First) { + self.queue_to_vecdeque(); + } + self + } + + /// Set the commitgraph as `cache` to greatly accelerate any traversal. + /// + /// The cache will be used if possible, but we will fall-back without error to using the object + /// database for commit lookup. If the cache is corrupt, we will fall back to the object database as well. + pub fn commit_graph(mut self, cache: Option) -> Self { + self.cache = cache; + self + } + + fn queue_to_vecdeque(&mut self) { + let state = self.state.borrow_mut(); + state.next.extend( + std::mem::replace(&mut state.queue, gix_revwalk::PriorityQueue::new()) + .into_iter_unordered() + .map(|(_time, id)| id), + ); + } } /// Initialization @@ -191,12 +270,13 @@ pub mod ancestors { for tip in tips.map(Into::into) { let was_inserted = state.seen.insert(tip); if was_inserted && predicate(&tip) { - state.next.push_back((tip, 0)); + state.next.push_back(tip); } } } Self { find, + cache: None, predicate, state, parents: Default::default(), @@ -213,6 +293,11 @@ pub mod ancestors { pub fn commit_iter(&self) -> CommitRefIter<'_> { CommitRefIter::from_bytes(&self.state.borrow().buf) } + + /// Return the current commits data. + pub fn commit_data(&self) -> &[u8] { + &self.state.borrow().buf + } } impl Iterator for Ancestors @@ -222,14 +307,14 @@ pub mod ancestors { StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { if matches!(self.parents, Parents::First) { self.next_by_topology() } else { match self.sorting { - Sorting::Topological => self.next_by_topology(), + Sorting::BreadthFirst => self.next_by_topology(), Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None), Sorting::ByCommitTimeNewestFirstCutoffOlderThan { time_in_seconds_since_epoch, @@ -259,26 +344,41 @@ pub mod ancestors { StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { - fn next_by_commit_date(&mut self, cutoff_older_than: Option) -> Option> { + fn next_by_commit_date(&mut self, cutoff_older_than: Option) -> Option> { let state = self.state.borrow_mut(); - let (oid, _commit_time) = state.next.pop_front()?; - match (self.find)(&oid, &mut state.buf) { - Ok(commit_iter) => { - let mut count = 0; + let (commit_time, oid) = state.queue.pop()?; + let mut parents: ParentIds = Default::default(); + match super::find(self.cache.as_ref(), &mut self.find, &oid, &mut state.buf) { + Ok(Either::CachedCommit(commit)) => { + if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { + // drop corrupt caches and try again with ODB + self.cache = None; + return self.next_by_commit_date(cutoff_older_than); + } + for (id, commit_time) in state.parent_ids.drain(..) { + parents.push(id); + let was_inserted = state.seen.insert(id); + if !(was_inserted && (self.predicate)(&id)) { + continue; + } + + let parent_commit_time = commit_time as u32; // TODO: fix this once we have 64 bit date support + match cutoff_older_than { + Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, + Some(_) | None => state.queue.insert(parent_commit_time, id), + } + } + } + Ok(Either::CommitRefIter(commit_iter)) => { for token in commit_iter { - count += 1; - let is_first = count == 1; match token { Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + parents.push(id); let was_inserted = state.seen.insert(id); if !(was_inserted && (self.predicate)(&id)) { - if is_first && matches!(self.parents, Parents::First) { - break; - } else { - continue; - } + continue; } let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok(); @@ -291,21 +391,9 @@ pub mod ancestors { }) .unwrap_or_default(); - let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse()) - { - Ok(_) => None, - Err(pos) => Some(pos), - }; match cutoff_older_than { Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, - Some(_) | None => match pos { - Some(pos) => state.next.insert(pos, (id, parent_commit_time)), - None => state.next.push_back((id, parent_commit_time)), - }, - } - - if is_first && matches!(self.parents, Parents::First) { - break; + Some(_) | None => state.queue.insert(parent_commit_time, id), } } Ok(_unused_token) => break, @@ -320,7 +408,11 @@ pub mod ancestors { })) } } - Some(Ok(oid)) + Some(Ok(Info { + id: oid, + parent_ids: parents, + commit_time: Some(commit_time as u64), + })) } } @@ -332,18 +424,38 @@ pub mod ancestors { StateMut: BorrowMut, E: std::error::Error + Send + Sync + 'static, { - fn next_by_topology(&mut self) -> Option> { + fn next_by_topology(&mut self) -> Option> { let state = self.state.borrow_mut(); - let (oid, _commit_time) = state.next.pop_front()?; - match (self.find)(&oid, &mut state.buf) { - Ok(commit_iter) => { + let oid = state.next.pop_front()?; + let mut parents: ParentIds = Default::default(); + match super::find(self.cache.as_ref(), &mut self.find, &oid, &mut state.buf) { + Ok(Either::CachedCommit(commit)) => { + if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { + // drop corrupt caches and try again with ODB + self.cache = None; + return self.next_by_topology(); + } + + for (id, _commit_time) in state.parent_ids.drain(..) { + parents.push(id); + let was_inserted = state.seen.insert(id); + if was_inserted && (self.predicate)(&id) { + state.next.push_back(id); + } + if matches!(self.parents, Parents::First) { + break; + } + } + } + Ok(Either::CommitRefIter(commit_iter)) => { for token in commit_iter { match token { Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + parents.push(id); let was_inserted = state.seen.insert(id); if was_inserted && (self.predicate)(&id) { - state.next.push_back((id, 0)); + state.next.push_back(id); } if matches!(self.parents, Parents::First) { break; @@ -361,7 +473,51 @@ pub mod ancestors { })) } } - Some(Ok(oid)) + Some(Ok(Info { + id: oid, + parent_ids: parents, + commit_time: None, + })) } } } + +enum Either<'buf, 'cache> { + CommitRefIter(gix_object::CommitRefIter<'buf>), + CachedCommit(gix_commitgraph::file::Commit<'cache>), +} + +fn collect_parents( + dest: &mut SmallVec<[(gix_hash::ObjectId, u64); 2]>, + cache: Option<&gix_commitgraph::Graph>, + parents: gix_commitgraph::file::commit::Parents<'_>, +) -> bool { + dest.clear(); + let cache = cache.as_ref().expect("parents iter is available, backed by `cache`"); + for parent_id in parents { + match parent_id { + Ok(pos) => dest.push({ + let parent = cache.commit_at(pos); + (parent.id().to_owned(), parent.committer_timestamp()) + }), + Err(_err) => return false, + } + } + true +} + +fn find<'cache, 'buf, Find, E>( + cache: Option<&'cache gix_commitgraph::Graph>, + mut find: Find, + id: &gix_hash::oid, + buf: &'buf mut Vec, +) -> Result, E> +where + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, +{ + match cache.and_then(|cache| cache.commit_by_id(id).map(Either::CachedCommit)) { + Some(c) => Ok(c), + None => find(id, buf).map(Either::CommitRefIter), + } +} diff --git a/gix-traverse/tests/Cargo.toml b/gix-traverse/tests/Cargo.toml index 9b7838da2fc..3b50a811b49 100644 --- a/gix-traverse/tests/Cargo.toml +++ b/gix-traverse/tests/Cargo.toml @@ -19,3 +19,4 @@ gix-testtools = { path = "../../tests/tools" } gix-odb = { path = "../../gix-odb" } gix-hash = { path = "../../gix-hash" } gix-object = { path = "../../gix-object" } +gix-commitgraph = { path = "../../gix-commitgraph" } diff --git a/gix-traverse/tests/commit/mod.rs b/gix-traverse/tests/commit/mod.rs index 961940db1c6..e4e0d8ff6a6 100644 --- a/gix-traverse/tests/commit/mod.rs +++ b/gix-traverse/tests/commit/mod.rs @@ -7,6 +7,7 @@ mod ancestor { struct TraversalAssertion<'a> { init_script: &'a str, + repo_name: &'a str, tips: &'a [&'a str], expected: &'a [&'a str], mode: commit::Parents, @@ -15,8 +16,13 @@ mod ancestor { impl<'a> TraversalAssertion<'a> { fn new(init_script: &'a str, tips: &'a [&'a str], expected: &'a [&'a str]) -> Self { + Self::new_at(init_script, "", tips, expected) + } + + fn new_at(init_script: &'a str, repo_name: &'a str, tips: &'a [&'a str], expected: &'a [&'a str]) -> Self { TraversalAssertion { init_script, + repo_name, tips, expected, mode: Default::default(), @@ -38,7 +44,7 @@ mod ancestor { impl TraversalAssertion<'_> { fn setup(&self) -> crate::Result<(gix_odb::Handle, Vec, Vec)> { let dir = gix_testtools::scripted_fixture_read_only_standalone(self.init_script)?; - let store = gix_odb::at(dir.join(".git").join("objects"))?; + let store = gix_odb::at(dir.join(self.repo_name).join(".git").join("objects"))?; let tips: Vec<_> = self.tips.iter().copied().map(hex_to_id).collect(); let expected: Vec = tips .clone() @@ -47,227 +53,380 @@ mod ancestor { .collect(); Ok((store, tips, expected)) } - fn check_with_predicate(&mut self, predicate: impl FnMut(&oid) -> bool) -> crate::Result<()> { + + fn setup_commitgraph(&self, store: &gix_odb::Store, use_graph: bool) -> Option { + use_graph + .then(|| gix_commitgraph::at(store.path().join("info"))) + .transpose() + .expect("graph can be loaded if it exists") + } + + fn check_with_predicate(&mut self, predicate: impl FnMut(&oid) -> bool + Clone) -> crate::Result<()> { let (store, tips, expected) = self.setup()?; - let oids: Result, _> = commit::Ancestors::filtered( - tips, - commit::ancestors::State::default(), - move |oid, buf| store.find_commit_iter(oid, buf).map(|t| t.0), - predicate, - ) - .sorting(self.sorting)? - .parents(self.mode) - .collect(); + for use_commitgraph in [false, true] { + let oids = commit::Ancestors::filtered( + tips.clone(), + commit::ancestors::State::default(), + |oid, buf| store.find_commit_iter(oid, buf).map(|t| t.0), + predicate.clone(), + ) + .sorting(self.sorting)? + .parents(self.mode) + .commit_graph(self.setup_commitgraph(store.store_ref(), use_commitgraph)) + .map(|res| res.map(|info| info.id)) + .collect::, _>>()?; - assert_eq!(oids?, expected); + assert_eq!(oids, expected); + } Ok(()) } fn check(&self) -> crate::Result { let (store, tips, expected) = self.setup()?; - let oids: Result, _> = - commit::Ancestors::new(tips, commit::ancestors::State::default(), move |oid, buf| { + + for use_commitgraph in [false, true] { + let oids = commit::Ancestors::new(tips.clone(), commit::ancestors::State::default(), |oid, buf| { store.find_commit_iter(oid, buf).map(|t| t.0) }) .sorting(self.sorting)? .parents(self.mode) - .collect(); - assert_eq!(oids?, expected); + .commit_graph(self.setup_commitgraph(store.store_ref(), use_commitgraph)) + .map(|res| res.map(|info| info.id)) + .collect::, _>>()?; + assert_eq!(oids, expected); + } Ok(()) } } - #[test] - fn linear_history_no_branch() -> crate::Result { - TraversalAssertion::new( - "make_traversal_repo_for_commits.sh", - &["9556057aee5abb06912922e9f26c46386a816822"], - &[ - "17d78c64cef6c33a10a604573fd2c429e477fd63", - "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", - "134385f6d781b7e97062102c6a483440bfda2a03", - ], - ) - .check() - } + mod different_date_intermixed { + use crate::commit::ancestor::TraversalAssertion; + use gix_traverse::commit::Sorting; - #[test] - fn simple_branch_with_merge() -> crate::Result { - TraversalAssertion::new( - "make_traversal_repo_for_commits.sh", - &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], - &[ - "efd9a841189668f1bab5b8ebade9cd0a1b139a37", - "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", - "9556057aee5abb06912922e9f26c46386a816822", - "9152eeee2328073cf23dcf8e90c949170b711659", - "17d78c64cef6c33a10a604573fd2c429e477fd63", - "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", - "134385f6d781b7e97062102c6a483440bfda2a03", - ], - ) - .check() - } + #[test] + fn head_breadth_first() -> crate::Result { + TraversalAssertion::new_at( + "make_repos.sh", + "intermixed", + &["58912d92944087dcb09dca79cdd2a937cc158bed"], /* merge */ + // This is very different from what git does as it keeps commits together, + // whereas we spread them out breadth-first. + &[ + "2dce37be587e07caef8c4a5ab60b423b13a8536a", /* c3 */ + "0f6632a5a7d81417488b86692b729e49c1b73056", /* b1c2 */ + "a9c28710e058af4e5163699960234adb9fb2abc7", /* b2c2 */ + "ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83", /* c2 */ + "77fd3c6832c0cd542f7a39f3af9250c3268db979", /* b1c1 */ + "b648f955b930ca95352fae6f22cb593ee0244b27", /* b2c1 */ + "65d6af66f60b8e39fd1ba6a1423178831e764ec5", /* c1 */ + ], + ) + .check() + } - #[test] - fn simple_branch_first_parent_only() -> crate::Result { - TraversalAssertion::new( - "make_traversal_repo_for_commits.sh", - &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], - &[ - "efd9a841189668f1bab5b8ebade9cd0a1b139a37", - "9556057aee5abb06912922e9f26c46386a816822", - "17d78c64cef6c33a10a604573fd2c429e477fd63", - "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", - "134385f6d781b7e97062102c6a483440bfda2a03", - ], - ) - .with_parents(commit::Parents::First) - .check() + #[test] + fn head_date_order() -> crate::Result { + TraversalAssertion::new_at( + "make_repos.sh", + "intermixed", + &["58912d92944087dcb09dca79cdd2a937cc158bed"], /* merge */ + // This is exactly what git shows. + &[ + "2dce37be587e07caef8c4a5ab60b423b13a8536a", /* c3 */ + "0f6632a5a7d81417488b86692b729e49c1b73056", /* b1c2 */ + "a9c28710e058af4e5163699960234adb9fb2abc7", /* b2c2 */ + "77fd3c6832c0cd542f7a39f3af9250c3268db979", /* b1c1 */ + "b648f955b930ca95352fae6f22cb593ee0244b27", /* b2c1 */ + "ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83", /* c2 */ + "65d6af66f60b8e39fd1ba6a1423178831e764ec5", /* c1 */ + ], + ) + .with_sorting(Sorting::ByCommitTimeNewestFirst) + .check() + } } - #[test] - fn multiple_tips() -> crate::Result { - TraversalAssertion::new( - "make_traversal_repo_for_commits.sh", - &[ - "01ec18a3ebf2855708ad3c9d244306bc1fae3e9b", - "9556057aee5abb06912922e9f26c46386a816822", - ], - &[ - "efd9a841189668f1bab5b8ebade9cd0a1b139a37", - "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", - "17d78c64cef6c33a10a604573fd2c429e477fd63", - "9152eeee2328073cf23dcf8e90c949170b711659", - "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", - "134385f6d781b7e97062102c6a483440bfda2a03", - ], - ) - .check() - } + mod different_date { + use crate::commit::ancestor::TraversalAssertion; + use gix_traverse::commit::Sorting; - #[test] - fn filtered_commit_does_not_block_ancestors_reachable_from_another_commit() -> crate::Result { - // I don't see a use case for the predicate returning false for a commit but return true for - // at least one of its ancestors, so this test is kind of dubious. But we do want - // `Ancestors` to not eagerly blacklist all of a commit's ancestors when blacklisting that - // one commit, and this test happens to check that. - TraversalAssertion::new( - "make_traversal_repo_for_commits.sh", - &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], - &[ - "efd9a841189668f1bab5b8ebade9cd0a1b139a37", - "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", - "9556057aee5abb06912922e9f26c46386a816822", - "17d78c64cef6c33a10a604573fd2c429e477fd63", - "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", - "134385f6d781b7e97062102c6a483440bfda2a03", - ], - ) - .check_with_predicate(|id| id != hex_to_id("9152eeee2328073cf23dcf8e90c949170b711659")) - } + #[test] + fn head_breadth_first() -> crate::Result { + TraversalAssertion::new_at( + "make_repos.sh", + "simple", + &["f49838d84281c3988eeadd988d97dd358c9f9dc4"], /* merge */ + // This is very different from what git does as it keeps commits together, + // whereas we spread them out breadth-first. + &[ + "0edb95c0c0d9933d88f532ec08fcd405d0eee882", /* c5 */ + "66a309480201c4157b0eae86da69f2d606aadbe7", /* b1c2 */ + "48e8dac19508f4238f06c8de2b10301ce64a641c", /* b2c2 */ + "8cb5f13b66ce52a49399a2c49f537ee2b812369c", /* c4 */ + "80947acb398362d8236fcb8bf0f8a9dac640583f", /* b1c1 */ + "cb6a6befc0a852ac74d74e0354e0f004af29cb79", /* b2c1 */ + "33aa07785dd667c0196064e3be3c51dd9b4744ef", /* c3 */ + "ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83", /* c2 */ + "65d6af66f60b8e39fd1ba6a1423178831e764ec5", /* c1 */ + ], + ) + .check() + } - #[test] - fn predicate_only_called_once_even_if_fork_point() -> crate::Result { - // The `self.seen` check should come before the `self.predicate` check, as we don't know how - // expensive calling `self.predicate` may be. - let mut seen = false; - TraversalAssertion::new( - "make_traversal_repo_for_commits.sh", - &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], - &[ - "efd9a841189668f1bab5b8ebade9cd0a1b139a37", - "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", - "9152eeee2328073cf23dcf8e90c949170b711659", - ], - ) - .check_with_predicate(move |id| { - if id == hex_to_id("9556057aee5abb06912922e9f26c46386a816822") { - assert!(!seen); - seen = true; - false - } else { - true - } - }) + #[test] + fn head_date_order() -> crate::Result { + TraversalAssertion::new_at( + "make_repos.sh", + "simple", + &["f49838d84281c3988eeadd988d97dd358c9f9dc4"], /* merge */ + // This is exactly what git shows. + &[ + "0edb95c0c0d9933d88f532ec08fcd405d0eee882", /* c5 */ + "66a309480201c4157b0eae86da69f2d606aadbe7", /* b1c2 */ + "80947acb398362d8236fcb8bf0f8a9dac640583f", /* b1c1 */ + "48e8dac19508f4238f06c8de2b10301ce64a641c", /* b2c2 */ + "cb6a6befc0a852ac74d74e0354e0f004af29cb79", /* b2c1 */ + "8cb5f13b66ce52a49399a2c49f537ee2b812369c", /* c4 */ + "33aa07785dd667c0196064e3be3c51dd9b4744ef", /* c3 */ + "ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83", /* c2 */ + "65d6af66f60b8e39fd1ba6a1423178831e764ec5", /* c1 */ + ], + ) + .with_sorting(Sorting::ByCommitTimeNewestFirst) + .check() + } } - #[test] - fn graph_sorted_commits() -> crate::Result { - TraversalAssertion::new( - "make_traversal_repo_for_commits_with_dates.sh", - &["288e509293165cb5630d08f4185bdf2445bf6170"], - &[ - "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", - "bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac", - "134385f6d781b7e97062102c6a483440bfda2a03", - ], - ) - .check() - } + /// Same dates are somewhat special as they show how sorting-details on priority queues affects ordering + mod same_date { + use crate::commit::ancestor::TraversalAssertion; + use crate::hex_to_id; + use gix_traverse::commit::{Parents, Sorting}; - #[test] - fn committer_date_sorted_commits() -> crate::Result { - TraversalAssertion::new( - "make_traversal_repo_for_commits_with_dates.sh", - &["288e509293165cb5630d08f4185bdf2445bf6170"], - &[ - "bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac", - "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", - "134385f6d781b7e97062102c6a483440bfda2a03", - ], - ) - .with_sorting(commit::Sorting::ByCommitTimeNewestFirst) - .check() - } + #[test] + fn c4_breadth_first() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_same_date.sh", + &["9556057aee5abb06912922e9f26c46386a816822"], /* c4 */ + &[ + "17d78c64cef6c33a10a604573fd2c429e477fd63", /* c3 */ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .check() + } - #[test] - fn committer_date_sorted_commits_with_cutoff() -> crate::Result { - TraversalAssertion::new( - "make_traversal_repo_for_commits_with_dates.sh", - &["288e509293165cb5630d08f4185bdf2445bf6170"], - &["bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"], - ) - .with_sorting(commit::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - time_in_seconds_since_epoch: 978393600, // =2001-01-02 00:00:00 +0000 - }) - .check() - } + #[test] + fn head_breadth_first() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_same_date.sh", + &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], /* m1b1 */ + // We always take the first parent first, then the second, and so on. + // Deviation: git for some reason displays b1c2 *before* c5, but I think it's better + // to have a strict parent order. + &[ + "efd9a841189668f1bab5b8ebade9cd0a1b139a37", /* c5 */ + "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", /* b1c2 */ + "9556057aee5abb06912922e9f26c46386a816822", /* c4 */ + "9152eeee2328073cf23dcf8e90c949170b711659", /* b1c1 */ + "17d78c64cef6c33a10a604573fd2c429e477fd63", /* c3 */ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .check() + } + + #[test] + fn head_date_order() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_same_date.sh", + &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], /* m1b1 */ + &[ + "efd9a841189668f1bab5b8ebade9cd0a1b139a37", /* c5 */ + "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", /* b1c2 */ + "9556057aee5abb06912922e9f26c46386a816822", /* c4 */ + "9152eeee2328073cf23dcf8e90c949170b711659", /* b1c1 */ + "17d78c64cef6c33a10a604573fd2c429e477fd63", /* c3 */ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .with_sorting(Sorting::ByCommitTimeNewestFirst) + .check() + } + + #[test] + fn head_first_parent_only_breadth_first() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_same_date.sh", + &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], /* m1b1 */ + &[ + "efd9a841189668f1bab5b8ebade9cd0a1b139a37", /* c5 */ + "9556057aee5abb06912922e9f26c46386a816822", /* c4 */ + "17d78c64cef6c33a10a604573fd2c429e477fd63", /* c3 */ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .with_parents(Parents::First) + .check() + } + + #[test] + fn head_c4_breadth_first() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_same_date.sh", + &[ + "01ec18a3ebf2855708ad3c9d244306bc1fae3e9b", /* m1b1 */ + "9556057aee5abb06912922e9f26c46386a816822", /* c4 */ + ], + &[ + "efd9a841189668f1bab5b8ebade9cd0a1b139a37", /* c5 */ + "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", /* b1c2 */ + "17d78c64cef6c33a10a604573fd2c429e477fd63", /* c3 */ + "9152eeee2328073cf23dcf8e90c949170b711659", /* b1c1 */ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .check() + } + + #[test] + fn filtered_commit_does_not_block_ancestors_reachable_from_another_commit() -> crate::Result { + // I don't see a use case for the predicate returning false for a commit but return true for + // at least one of its ancestors, so this test is kind of dubious. But we do want + // `Ancestors` to not eagerly blacklist all of a commit's ancestors when blacklisting that + // one commit, and this test happens to check that. + TraversalAssertion::new( + "make_traversal_repo_for_commits_same_date.sh", + &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], /* m1b1 */ + &[ + "efd9a841189668f1bab5b8ebade9cd0a1b139a37", /* c5 */ + "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", /* b1c2 */ + "9556057aee5abb06912922e9f26c46386a816822", /* c4 */ + "17d78c64cef6c33a10a604573fd2c429e477fd63", /* c3 */ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .check_with_predicate(|id| id != hex_to_id("9152eeee2328073cf23dcf8e90c949170b711659")) + } - #[test] - fn committer_date_sorted_commits_with_cutoff_is_applied_to_starting_position() -> crate::Result { - let dir = - gix_testtools::scripted_fixture_read_only_standalone("make_traversal_repo_for_commits_with_dates.sh")?; - let store = gix_odb::at(dir.join(".git").join("objects"))?; - let iter = commit::Ancestors::new( - Some(hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7")), - commit::ancestors::State::default(), - move |oid, buf| store.find_commit_iter(oid, buf).map(|t| t.0), - ) - .sorting(commit::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - time_in_seconds_since_epoch: 978393600, // =2001-01-02 00:00:00 +0000 - })?; - assert_eq!( - iter.count(), - 0, - "initial tips that don't pass cutoff value are not returned either" - ); - Ok(()) + #[test] + fn predicate_only_called_once_even_if_fork_point() -> crate::Result { + // The `self.seen` check should come before the `self.predicate` check, as we don't know how + // expensive calling `self.predicate` may be. + let mut seen = false; + TraversalAssertion::new( + "make_traversal_repo_for_commits_same_date.sh", + &["01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"], /* m1b1 */ + &[ + "efd9a841189668f1bab5b8ebade9cd0a1b139a37", /* c5 */ + "ce2e8ffaa9608a26f7b21afc1db89cadb54fd353", /* b1c2 */ + "9152eeee2328073cf23dcf8e90c949170b711659", /* b1c1 */ + ], + ) + .check_with_predicate(move |id| { + if id == hex_to_id("9556057aee5abb06912922e9f26c46386a816822") { + assert!(!seen); + seen = true; + false + } else { + true + } + }) + } } - #[test] - fn committer_date_sorted_commits_parents_only() -> crate::Result { - TraversalAssertion::new( - "make_traversal_repo_for_commits_with_dates.sh", - &["288e509293165cb5630d08f4185bdf2445bf6170"], - &[ - "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", - "134385f6d781b7e97062102c6a483440bfda2a03", - ], - ) - .with_sorting(commit::Sorting::ByCommitTimeNewestFirst) - .with_parents(commit::Parents::First) - .check() + /// Some dates adjusted to be a year apart, but still 'c1' and 'c2' with the same date. + mod adjusted_dates { + use crate::commit::ancestor::TraversalAssertion; + use crate::hex_to_id; + use gix_odb::FindExt; + use gix_traverse::commit::{ancestors, Ancestors, Parents, Sorting}; + + #[test] + fn head_breadth_first() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_with_dates.sh", + &["288e509293165cb5630d08f4185bdf2445bf6170"], /* m1b1 */ + // Here `git` also shows `b1c1` first, making topo-order similar to date order for some reason, + // even though c2 *is* the first parent. + &[ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac", /* b1c1 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .check() + } + + #[test] + fn head_date_order() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_with_dates.sh", + &["288e509293165cb5630d08f4185bdf2445bf6170"], /* m1b1 */ + &[ + "bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac", /* b1c1 */ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .with_sorting(Sorting::ByCommitTimeNewestFirst) + .check() + } + + #[test] + fn head_date_order_with_cutoff() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_with_dates.sh", + &["288e509293165cb5630d08f4185bdf2445bf6170"], /* m1b1 */ + &["bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"], /* b1c1 */ + ) + .with_sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + time_in_seconds_since_epoch: 978393600, // =2001-01-02 00:00:00 +0000 + }) + .check() + } + + #[test] + fn date_order_with_cutoff_is_applied_to_starting_position() -> crate::Result { + let dir = + gix_testtools::scripted_fixture_read_only_standalone("make_traversal_repo_for_commits_with_dates.sh")?; + let store = gix_odb::at(dir.join(".git").join("objects"))?; + let iter = Ancestors::new( + Some(hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7" /* c2 */)), + ancestors::State::default(), + move |oid, buf| store.find_commit_iter(oid, buf), + ) + .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + time_in_seconds_since_epoch: 978393600, // =2001-01-02 00:00:00 +0000 + })?; + assert_eq!( + iter.count(), + 0, + "initial tips that don't pass cutoff value are not returned either" + ); + Ok(()) + } + + #[test] + fn head_date_order_first_parent_only() -> crate::Result { + TraversalAssertion::new( + "make_traversal_repo_for_commits_with_dates.sh", + &["288e509293165cb5630d08f4185bdf2445bf6170"], /* m1b1 */ + &[ + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ + "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ + ], + ) + .with_sorting(Sorting::ByCommitTimeNewestFirst) + .with_parents(Parents::First) + .check() + } } } diff --git a/gix-traverse/tests/fixtures/generated-archives/make_repos.tar.xz b/gix-traverse/tests/fixtures/generated-archives/make_repos.tar.xz new file mode 100644 index 00000000000..14a9ad30277 --- /dev/null +++ b/gix-traverse/tests/fixtures/generated-archives/make_repos.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73994d6fafbf957a5271c17085c6b4c6a72e845d6f15f8b1cda3c6a11367b01 +size 13252 diff --git a/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits.tar.xz b/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits.tar.xz deleted file mode 100644 index 3b3aa46d928..00000000000 --- a/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b27ea2072a10e4d86f848575b30e308cefd02d07c8add95904013f9759adf256 -size 11304 diff --git a/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits_same_date.tar.xz b/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits_same_date.tar.xz new file mode 100644 index 00000000000..f85404918b0 --- /dev/null +++ b/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits_same_date.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8336802a1d127fa751a87f01c744e37aee03c90929c13c352d8b884f00992833 +size 10988 diff --git a/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits_with_dates.tar.xz b/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits_with_dates.tar.xz index 36ed2e1aaad..1c6a7720424 100644 --- a/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits_with_dates.tar.xz +++ b/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_commits_with_dates.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f19abf0d05a12c2934f5b479a565afbf348cdaf9c05fb99d7b5c7d225eeb125 -size 10512 +oid sha256:2898513038dbd39a50856789cea432ddc48e8bf545e33dc5af394f7834cdb857 +size 10520 diff --git a/gix-traverse/tests/fixtures/make_repos.sh b/gix-traverse/tests/fixtures/make_repos.sh new file mode 100644 index 00000000000..ec35266c2e5 --- /dev/null +++ b/gix-traverse/tests/fixtures/make_repos.sh @@ -0,0 +1,78 @@ +#!/bin/bash +set -eu -o pipefail + +function tick () { + if test -z "${tick+set}" + then + tick=1112911993 + else + tick=$(($tick + 60)) + fi + GIT_COMMITTER_DATE="$tick -0700" + GIT_AUTHOR_DATE="$tick -0700" + export GIT_COMMITTER_DATE GIT_AUTHOR_DATE +} + +tick +function commit() { + local message=${1:?first argument is the commit message} + tick + git commit --allow-empty -m "$message" +} + +function optimize() { + git commit-graph write --no-progress --reachable + git repack -adq +} + +(git init simple && cd simple + git config merge.ff false + + git checkout -q -b main + commit c1 + commit c2 + commit c3 + commit c4 + + git checkout -q -b branch1 + git checkout -q -b branch2 + commit b2c1 + commit b2c2 + + git checkout branch1 + commit b1c1 + commit b1c2 + + git checkout -q main + commit c5 + git merge branch1 branch2 -m merge + + optimize +) + +(git init intermixed && cd intermixed + git config merge.ff false + + git checkout -q -b main + commit c1 + commit c2 + + git checkout -q -b branch1 + git checkout -q -b branch2 + commit b2c1 + + git checkout branch1 + commit b1c1 + + git checkout branch2 + commit b2c2 + + git checkout branch1 + commit b1c2 + + git checkout -q main + commit c3 + git merge branch1 branch2 -m merge + + optimize +) diff --git a/gix-traverse/tests/fixtures/make_traversal_repo_for_commits.sh b/gix-traverse/tests/fixtures/make_traversal_repo_for_commits_same_date.sh similarity index 73% rename from gix-traverse/tests/fixtures/make_traversal_repo_for_commits.sh rename to gix-traverse/tests/fixtures/make_traversal_repo_for_commits_same_date.sh index fe7d265f0d3..d9e6e60e601 100755 --- a/gix-traverse/tests/fixtures/make_traversal_repo_for_commits.sh +++ b/gix-traverse/tests/fixtures/make_traversal_repo_for_commits_same_date.sh @@ -1,6 +1,8 @@ #!/bin/bash set -eu -o pipefail +# all commits have the same date as it's set by `gix-testtools` to a fixed value. + git init -q git config merge.ff false @@ -17,3 +19,6 @@ git commit -q --allow-empty -m b1c2 git checkout -q main git commit -q --allow-empty -m c5 git merge branch1 -m m1b1 + +git commit-graph write --no-progress --reachable +git repack -adq diff --git a/gix-traverse/tests/fixtures/make_traversal_repo_for_commits_with_dates.sh b/gix-traverse/tests/fixtures/make_traversal_repo_for_commits_with_dates.sh index bb557a416c0..f80a769f43d 100755 --- a/gix-traverse/tests/fixtures/make_traversal_repo_for_commits_with_dates.sh +++ b/gix-traverse/tests/fixtures/make_traversal_repo_for_commits_with_dates.sh @@ -18,3 +18,6 @@ GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" git commit -q --allow-empty -m c2 # Commit from branch1 made in 2001 merged in 2002 GIT_COMMITTER_DATE="2002-01-02 00:00:00 +0000" git merge branch1 -m m1b1 #288e509293165cb5630d08f4185bdf2445bf6170- + +git commit-graph write --no-progress --reachable +git repack -adq diff --git a/gix/examples/stats.rs b/gix/examples/stats.rs index f4c500ac8f2..7d8dc3dc592 100644 --- a/gix/examples/stats.rs +++ b/gix/examples/stats.rs @@ -1,37 +1,39 @@ +use gix::prelude::ObjectIdExt; use gix::Reference; +use std::io::Write; fn main() -> Result<(), Box> { let mut repo = gix::discover(".")?; println!("Repo: {}", repo.work_dir().unwrap_or_else(|| repo.git_dir()).display()); - let mut max_commit_size = 0; - let mut avg_commit_size = 0; + let mut max_parents = 0; + let mut avg_parents = 0; repo.object_cache_size(32 * 1024); - let commit_ids = repo + let mut most_recent_commit_id = None; + let num_commits = repo .head()? .into_fully_peeled_id() - .ok_or("There are no commits - nothing to do here.")?? + .ok_or("Cannot provide meaningful stats on empty repos")?? .ancestors() .all()? - .inspect(|id| { - if let Ok(Ok(object)) = id.as_ref().map(|id| id.object()) { - avg_commit_size += object.data.len(); - if object.data.len() > max_commit_size { - max_commit_size = object.data.len(); - } + .map_while(Result::ok) + .inspect(|commit| { + if most_recent_commit_id.is_none() { + most_recent_commit_id = Some(commit.id); } + avg_parents += commit.parent_ids.len(); + max_parents = max_parents.max(commit.parent_ids.len()); }) - .collect::, _>>()?; - println!("Num Commits: {}", commit_ids.len()); - println!("Max commit Size: {max_commit_size}"); - println!("Avg commit Size: {}", avg_commit_size / commit_ids.len()); - assert!(!commit_ids.is_empty(), "checked that before"); + .count(); + println!("Num Commits: {num_commits}"); + println!("Max parents: {max_parents}"); + println!("Avg parents: {}", avg_parents / num_commits); - let last_commit_id = &commit_ids[0]; println!("Most recent commit message"); - let object = last_commit_id.object()?; + let object = most_recent_commit_id.expect("already checked").attach(&repo).object()?; let commit = object.into_commit(); println!("{}", commit.message_raw()?); + std::io::stdout().flush()?; let tree = commit.tree()?; diff --git a/gix/src/remote/connection/fetch/negotiate.rs b/gix/src/remote/connection/fetch/negotiate.rs index e94461bab09..fde2e2b2ba0 100644 --- a/gix/src/remote/connection/fetch/negotiate.rs +++ b/gix/src/remote/connection/fetch/negotiate.rs @@ -234,7 +234,7 @@ fn mark_recent_complete_commits( .peek() .and_then(|(commit_time, id)| (commit_time >= &cutoff).then_some(*id)) { - queue.pop(); + queue.pop_value(); let commit = graph.get(&id).expect("definitely set when adding tips or parents"); for parent_id in commit.parents.clone() { let mut was_complete = false; diff --git a/gix/src/remote/connection/fetch/receive_pack.rs b/gix/src/remote/connection/fetch/receive_pack.rs index 7837a9d3a33..820614647af 100644 --- a/gix/src/remote/connection/fetch/receive_pack.rs +++ b/gix/src/remote/connection/fetch/receive_pack.rs @@ -131,7 +131,7 @@ where r.objects.unset_object_cache(); r }; - let mut graph = graph_repo.commit_graph(); + let mut graph = graph_repo.revision_graph(); let action = negotiate::mark_complete_and_common_ref( &graph_repo, negotiator.deref_mut(), diff --git a/gix/src/remote/connection/fetch/update_refs/mod.rs b/gix/src/remote/connection/fetch/update_refs/mod.rs index 9534906720a..fde53c75cad 100644 --- a/gix/src/remote/connection/fetch/update_refs/mod.rs +++ b/gix/src/remote/connection/fetch/update_refs/mod.rs @@ -140,7 +140,7 @@ pub(crate) fn update( ); match ancestors { Ok(mut ancestors) => { - ancestors.any(|cid| cid.map_or(false, |cid| cid == local_id)) + ancestors.any(|cid| cid.map_or(false, |c| c.id == local_id)) } Err(_) => { force = true; diff --git a/gix/src/repository/graph.rs b/gix/src/repository/graph.rs index a1f6c7f89a5..a87fb8f4c56 100644 --- a/gix/src/repository/graph.rs +++ b/gix/src/repository/graph.rs @@ -11,7 +11,7 @@ impl crate::Repository { /// /// Note that the [Graph][gix_revision::Graph] can be sensitive to various object database settings that may affect the performance /// of the commit walk. - pub fn commit_graph(&self) -> gix_revision::Graph<'_, T> { + pub fn revision_graph(&self) -> gix_revision::Graph<'_, T> { gix_revision::Graph::new( |id, buf| { self.objects @@ -21,4 +21,13 @@ impl crate::Repository { gix_commitgraph::at(self.objects.store_ref().path().join("info")).ok(), ) } + + /// Return a cache for commits and their graph structure, as managed by `git commit-graph`, for accelerating commit walks on + /// a low level. + /// + /// Note that [`revision_graph()`][crate::Repository::revision_graph()] should be preferred for general purpose walks that don't + /// rely on the actual commit cache to be present, while leveraging it if possible. + pub fn commit_graph(&self) -> Result { + gix_commitgraph::at(self.objects.store_ref().path().join("info")) + } } diff --git a/gix/src/repository/index.rs b/gix/src/repository/index.rs new file mode 100644 index 00000000000..ea68dbb08a1 --- /dev/null +++ b/gix/src/repository/index.rs @@ -0,0 +1,103 @@ +use crate::config::cache::util::ApplyLeniencyDefault; +use crate::repository::IndexPersistedOrInMemory; +use crate::worktree; +use gix_odb::FindExt; + +/// Index access +impl crate::Repository { + /// Open a new copy of the index file and decode it entirely. + /// + /// It will use the `index.threads` configuration key to learn how many threads to use. + /// Note that it may fail if there is no index. + pub fn open_index(&self) -> Result { + let thread_limit = self + .config + .resolved + .string("index", None, "threads") + .map(|value| crate::config::tree::Index::THREADS.try_into_index_threads(value)) + .transpose() + .with_lenient_default(self.config.lenient_config)?; + gix_index::File::at( + self.index_path(), + self.object_hash(), + gix_index::decode::Options { + thread_limit, + min_extension_block_in_bytes_for_threading: 0, + expected_checksum: None, + }, + ) + .map_err(Into::into) + } + + /// Return a shared worktree index which is updated automatically if the in-memory snapshot has become stale as the underlying file + /// on disk has changed. + /// + /// The index file is shared across all clones of this repository. + pub fn index(&self) -> Result { + self.index + .recent_snapshot( + || self.index_path().metadata().and_then(|m| m.modified()).ok(), + || { + self.open_index().map(Some).or_else(|err| match err { + worktree::open_index::Error::IndexFile(gix_index::file::init::Error::Io(err)) + if err.kind() == std::io::ErrorKind::NotFound => + { + Ok(None) + } + err => Err(err), + }) + }, + ) + .and_then(|opt| match opt { + Some(index) => Ok(index), + None => Err(worktree::open_index::Error::IndexFile( + gix_index::file::init::Error::Io(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("Could not find index file at {:?} for opening.", self.index_path()), + )), + )), + }) + } + + /// Open the persisted worktree index or generate it from the current `HEAD^{tree}` to live in-memory only. + /// + /// Use this method to get an index in any repository, even bare ones that don't have one naturally. + pub fn index_or_load_from_head( + &self, + ) -> Result { + Ok(match self.index() { + Ok(index) => IndexPersistedOrInMemory::Persisted(index), + Err(worktree::open_index::Error::IndexFile(_)) => { + let tree = self.head_commit()?.tree_id()?; + IndexPersistedOrInMemory::InMemory(gix_index::File::from_state( + gix_index::State::from_tree(&tree, |oid, buf| self.objects.find_tree_iter(oid, buf).ok())?, + self.git_dir().join("index"), + )) + } + Err(err) => return Err(err.into()), + }) + } +} + +impl std::ops::Deref for IndexPersistedOrInMemory { + type Target = gix_index::File; + + fn deref(&self) -> &Self::Target { + match self { + IndexPersistedOrInMemory::Persisted(i) => i, + IndexPersistedOrInMemory::InMemory(i) => i, + } + } +} + +impl IndexPersistedOrInMemory { + /// Consume this instance and turn it into an owned index file. + /// + /// Note that this will cause the persisted index to be cloned, which would happen whenever the repository has a worktree. + pub fn into_owned(self) -> gix_index::File { + match self { + IndexPersistedOrInMemory::Persisted(i) => gix_index::File::clone(&i), + IndexPersistedOrInMemory::InMemory(i) => i, + } + } +} diff --git a/gix/src/repository/mod.rs b/gix/src/repository/mod.rs index f8a51e8d05f..c915d8cb969 100644 --- a/gix/src/repository/mod.rs +++ b/gix/src/repository/mod.rs @@ -40,6 +40,7 @@ mod excludes; mod graph; pub(crate) mod identity; mod impls; +mod index; mod init; mod kind; mod location; @@ -52,3 +53,30 @@ mod snapshots; mod state; mod thread_safe; mod worktree; + +/// A type to represent an index which either was loaded from disk as it was persisted there, or created on the fly in memory. +pub enum IndexPersistedOrInMemory { + /// The index as loaded from disk, and shared across clones of the owning `Repository`. + Persisted(crate::worktree::Index), + /// A temporary index as created from the `HEAD^{tree}`, with the file path set to the place where it would be stored naturally. + /// + /// Note that unless saved explicitly, it will not persist. + InMemory(gix_index::File), +} + +/// +pub mod index_or_load_from_head { + /// The error returned by [`Repository::index_or_load_from_head()`][crate::Repository::index_or_load_from_head()]. + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + HeadCommit(#[from] crate::reference::head_commit::Error), + #[error(transparent)] + TreeId(#[from] gix_object::decode::Error), + #[error(transparent)] + TraverseTree(#[from] gix_traverse::tree::breadthfirst::Error), + #[error(transparent)] + OpenIndex(#[from] crate::worktree::open_index::Error), + } +} diff --git a/gix/src/repository/worktree.rs b/gix/src/repository/worktree.rs index f522a3f18b1..756b848fb4d 100644 --- a/gix/src/repository/worktree.rs +++ b/gix/src/repository/worktree.rs @@ -1,4 +1,4 @@ -use crate::{config::cache::util::ApplyLeniencyDefault, worktree, Worktree}; +use crate::{worktree, Worktree}; /// Interact with individual worktrees and their information. impl crate::Repository { @@ -49,58 +49,4 @@ impl crate::Repository { pub fn is_bare(&self) -> bool { self.config.is_bare && self.work_dir().is_none() } - - /// Open a new copy of the index file and decode it entirely. - /// - /// It will use the `index.threads` configuration key to learn how many threads to use. - /// Note that it may fail if there is no index. - pub fn open_index(&self) -> Result { - let thread_limit = self - .config - .resolved - .string("index", None, "threads") - .map(|value| crate::config::tree::Index::THREADS.try_into_index_threads(value)) - .transpose() - .with_lenient_default(self.config.lenient_config)?; - gix_index::File::at( - self.index_path(), - self.object_hash(), - gix_index::decode::Options { - thread_limit, - min_extension_block_in_bytes_for_threading: 0, - expected_checksum: None, - }, - ) - .map_err(Into::into) - } - - /// Return a shared worktree index which is updated automatically if the in-memory snapshot has become stale as the underlying file - /// on disk has changed. - /// - /// The index file is shared across all clones of this repository. - pub fn index(&self) -> Result { - self.index - .recent_snapshot( - || self.index_path().metadata().and_then(|m| m.modified()).ok(), - || { - self.open_index().map(Some).or_else(|err| match err { - worktree::open_index::Error::IndexFile(gix_index::file::init::Error::Io(err)) - if err.kind() == std::io::ErrorKind::NotFound => - { - Ok(None) - } - err => Err(err), - }) - }, - ) - .and_then(|opt| match opt { - Some(index) => Ok(index), - None => Err(worktree::open_index::Error::IndexFile( - gix_index::file::init::Error::Io(std::io::Error::new( - std::io::ErrorKind::NotFound, - format!("Could not find index file at {:?} for opening.", self.index_path()), - )), - )), - }) - } } diff --git a/gix/src/revision/spec/parse/delegate/navigate.rs b/gix/src/revision/spec/parse/delegate/navigate.rs index f6e08536873..1b1c8323b6e 100644 --- a/gix/src/revision/spec/parse/delegate/navigate.rs +++ b/gix/src/revision/spec/parse/delegate/navigate.rs @@ -65,7 +65,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> { .filter_map(Result::ok) .next() { - Some(id) => replacements.push((*obj, id.detach())), + Some(commit) => replacements.push((*obj, commit.id)), None => errors.push(( *obj, Error::AncestorOutOfRange { @@ -193,8 +193,8 @@ impl<'repo> delegate::Navigate for Delegate<'repo> { let mut matched = false; let mut count = 0; let commits = iter.map(|res| { - res.map_err(Error::from).and_then(|commit_id| { - commit_id.object().map_err(Error::from).map(|obj| obj.into_commit()) + res.map_err(Error::from).and_then(|commit| { + commit.id().object().map_err(Error::from).map(|obj| obj.into_commit()) }) }); for commit in commits { @@ -250,8 +250,8 @@ impl<'repo> delegate::Navigate for Delegate<'repo> { let mut matched = false; let mut count = 0; let commits = iter.map(|res| { - res.map_err(Error::from).and_then(|commit_id| { - commit_id.object().map_err(Error::from).map(|obj| obj.into_commit()) + res.map_err(Error::from).and_then(|commit| { + commit.id().object().map_err(Error::from).map(|obj| obj.into_commit()) }) }); for commit in commits { diff --git a/gix/src/revision/walk.rs b/gix/src/revision/walk.rs index 9c545d0d43c..f9a21d66674 100644 --- a/gix/src/revision/walk.rs +++ b/gix/src/revision/walk.rs @@ -1,6 +1,7 @@ use gix_hash::ObjectId; use gix_odb::FindExt; +use crate::ext::ObjectIdExt; use crate::{revision, Repository}; /// The error returned by [`Platform::all()`]. @@ -13,13 +14,85 @@ pub enum Error { ShallowCommits(#[from] crate::shallow::open::Error), } +/// Information about a commit that we obtained naturally as part of the iteration. +#[derive(Debug, Clone)] +pub struct Info<'repo> { + /// The detached id of the commit. + pub id: gix_hash::ObjectId, + /// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`][gix_traverse::commit::Parents::First] is enabled. + pub parent_ids: gix_traverse::commit::ParentIds, + /// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::BreadthFirst`][gix_traverse::commit::Sorting::BreadthFirst], + /// as the walk needs to require the commit-date. + pub commit_time: Option, + + repo: &'repo Repository, +} + +/// Access +impl<'repo> Info<'repo> { + /// Provide an attached version of our [`id`][Info::id] field. + pub fn id(&self) -> crate::Id<'repo> { + self.id.attach(self.repo) + } + + /// Read the whole object from the object database. + /// + /// Note that this is an expensive operation which shouldn't be performed unless one needs more than parent ids + /// and commit time. + pub fn object(&self) -> Result, crate::object::find::existing::Error> { + Ok(self.id().object()?.into_commit()) + } + + /// Provide an iterator yielding attached versions of our [`parent_ids`][Info::parent_ids] field. + pub fn parent_ids(&self) -> impl Iterator> + '_ { + self.parent_ids.iter().map(|id| id.attach(self.repo)) + } + + /// Returns the commit-time of this commit. + /// + /// ### Panics + /// + /// If the iteration wasn't ordered by date. + pub fn commit_time(&self) -> u64 { + self.commit_time.expect("traversal involving date caused it to be set") + } +} + +/// Initialization and detachment +impl<'repo> Info<'repo> { + /// Create a new instance that represents `info`, but is attached to `repo` as well. + pub fn new(info: gix_traverse::commit::Info, repo: &'repo Repository) -> Self { + Info { + id: info.id, + parent_ids: info.parent_ids, + commit_time: info.commit_time, + repo, + } + } + /// Consume this instance and remove the reference to the underlying repository. + /// + /// This is useful for sending instances across threads, for example. + pub fn detach(self) -> gix_traverse::commit::Info { + gix_traverse::commit::Info { + id: self.id, + parent_ids: self.parent_ids, + commit_time: self.commit_time, + } + } +} + /// A platform to traverse the revision graph by adding starting points as well as points which shouldn't be crossed, /// returned by [`Repository::rev_walk()`]. +/// +/// **Note that we automatically leverage the commitgraph data structure**, but if you know that additional information like +/// author or commit messages will be required of *all* commits traversed here, it should be better to avoid trying to load it +/// by [turning commit-graph support off][Platform::use_commit_graph()]. This certainly is a micro-optimization though. pub struct Platform<'repo> { pub(crate) repo: &'repo Repository, pub(crate) tips: Vec, pub(crate) sorting: gix_traverse::commit::Sorting, pub(crate) parents: gix_traverse::commit::Parents, + pub(crate) use_commit_graph: bool, } impl<'repo> Platform<'repo> { @@ -29,13 +102,14 @@ impl<'repo> Platform<'repo> { tips: tips.into_iter().map(Into::into).collect(), sorting: Default::default(), parents: Default::default(), + use_commit_graph: true, } } } /// Create-time builder methods impl<'repo> Platform<'repo> { - /// Set the sort mode for commits to the given value. The default is to order by topology. + /// Set the sort mode for commits to the given value. The default is to order topologically breadth-first. pub fn sorting(mut self, sorting: gix_traverse::commit::Sorting) -> Self { self.sorting = sorting; self @@ -46,6 +120,15 @@ impl<'repo> Platform<'repo> { self.parents = gix_traverse::commit::Parents::First; self } + + /// Allow using the commitgraph, if present, if `toggle` is `true`, or disallow it with `false`. + /// + /// Note that the commitgraph will be used by default, and that errors just lead to falling back to the object database, + /// it's treated as a cache. + pub fn use_commit_graph(mut self, toggle: bool) -> Self { + self.use_commit_graph = toggle; + self + } } /// Produce the iterator @@ -63,6 +146,7 @@ impl<'repo> Platform<'repo> { tips, sorting, parents, + use_commit_graph, } = self; Ok(revision::Walk { repo, @@ -100,7 +184,8 @@ impl<'repo> Platform<'repo> { }, ) .sorting(sorting)? - .parents(parents), + .parents(parents) + .commit_graph(use_commit_graph.then(|| self.repo.commit_graph().ok()).flatten()), ), }) } @@ -116,20 +201,21 @@ impl<'repo> Platform<'repo> { } pub(crate) mod iter { - use crate::{ext::ObjectIdExt, Id}; - /// The iterator returned by [`crate::revision::walk::Platform::all()`]. pub struct Walk<'repo> { pub(crate) repo: &'repo crate::Repository, - pub(crate) inner: - Box> + 'repo>, + pub(crate) inner: Box< + dyn Iterator> + 'repo, + >, } impl<'repo> Iterator for Walk<'repo> { - type Item = Result, gix_traverse::commit::ancestors::Error>; + type Item = Result, gix_traverse::commit::ancestors::Error>; fn next(&mut self) -> Option { - self.inner.next().map(|res| res.map(|id| id.attach(self.repo))) + self.inner + .next() + .map(|res| res.map(|info| super::Info::new(info, self.repo))) } } } diff --git a/gix/tests/fixtures/generated-archives/make_repo_with_fork_and_dates.tar.xz b/gix/tests/fixtures/generated-archives/make_repo_with_fork_and_dates.tar.xz index 5e9a4e24677..1c6a7720424 100644 --- a/gix/tests/fixtures/generated-archives/make_repo_with_fork_and_dates.tar.xz +++ b/gix/tests/fixtures/generated-archives/make_repo_with_fork_and_dates.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91b1dd658c06390db23c8aa1fbf4942c83ed31e05eab07e0f7596c94a8a8fc07 -size 10496 +oid sha256:2898513038dbd39a50856789cea432ddc48e8bf545e33dc5af394f7834cdb857 +size 10520 diff --git a/gix/tests/fixtures/make_repo_with_fork_and_dates.sh b/gix/tests/fixtures/make_repo_with_fork_and_dates.sh index bb557a416c0..f80a769f43d 100644 --- a/gix/tests/fixtures/make_repo_with_fork_and_dates.sh +++ b/gix/tests/fixtures/make_repo_with_fork_and_dates.sh @@ -18,3 +18,6 @@ GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" git commit -q --allow-empty -m c2 # Commit from branch1 made in 2001 merged in 2002 GIT_COMMITTER_DATE="2002-01-02 00:00:00 +0000" git merge branch1 -m m1b1 #288e509293165cb5630d08f4185bdf2445bf6170- + +git commit-graph write --no-progress --reachable +git repack -adq diff --git a/gix/tests/id/mod.rs b/gix/tests/id/mod.rs index 1cf1c474399..c5dffdf09ae 100644 --- a/gix/tests/id/mod.rs +++ b/gix/tests/id/mod.rs @@ -75,13 +75,18 @@ mod ancestors { fn all() -> crate::Result { let repo = crate::repo("make_repo_with_fork_and_dates.sh")?.to_thread_local(); let head = repo.head()?.into_fully_peeled_id().expect("born")?; - let commits_graph_order = head.ancestors().all()?.collect::, _>>()?; + let commits_graph_order = head + .ancestors() + .all()? + .map(|c| c.map(|c| c.detach())) + .collect::, _>>()?; assert_eq!(commits_graph_order.len(), 4, "need a specific amount of commits"); let commits_by_commit_date = head .ancestors() .sorting(commit::Sorting::ByCommitTimeNewestFirst) .all()? + .map(|c| c.map(|c| c.detach())) .collect::, _>>()?; assert_eq!( commits_by_commit_date.len(), @@ -106,27 +111,31 @@ mod ancestors { let repo = crate::repo("make_repo_with_fork_and_dates.sh")?.to_thread_local(); let head = repo.head()?.into_fully_peeled_id().expect("born")?; - for sorting in [ - commit::Sorting::Topological, - commit::Sorting::ByCommitTimeNewestFirst, - commit::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - time_in_seconds_since_epoch: 0, - }, - ] { - let commits_graph_order = head - .ancestors() - .sorting(sorting) - .selected(|id| { - let _assert_lifetime_works = &repo; // assure we can use repo here. - id != hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7") - && id != hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac") - })? - .collect::, _>>()?; - assert_eq!( - commits_graph_order, - &[hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170")], - "we ignore all but the first" - ); + for use_commit_graph in [false, true] { + for sorting in [ + commit::Sorting::BreadthFirst, + commit::Sorting::ByCommitTimeNewestFirst, + commit::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + time_in_seconds_since_epoch: 0, + }, + ] { + let commits_graph_order = head + .ancestors() + .sorting(sorting) + .use_commit_graph(use_commit_graph) + .selected(|id| { + let _assert_lifetime_works = &repo; // assure we can use repo here. + id != hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7") + && id != hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac") + })? + .map(|c| c.map(|c| c.id)) + .collect::, _>>()?; + assert_eq!( + commits_graph_order, + &[hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170")], + "we ignore all but the first" + ); + } } Ok(()) } diff --git a/gix/tests/repository/object.rs b/gix/tests/repository/object.rs index aeb3dd4036b..05b7602b731 100644 --- a/gix/tests/repository/object.rs +++ b/gix/tests/repository/object.rs @@ -125,15 +125,18 @@ mod find { } for commit_id in repo.head()?.peeled()?.id().expect("born").ancestors().all()? { let commit = commit_id?; - assert_eq!(commit.object()?.kind, gix_object::Kind::Commit); + assert_eq!(commit.id().object()?.kind, gix_object::Kind::Commit); if round == 2 { assert_eq!( - commit.object()?.kind, + commit.id().object()?.kind, gix_object::Kind::Commit, "repeated request triggers cache and doesn't fail" ); } - assert_eq!(commit.try_object()?.expect("exists").kind, gix_object::Kind::Commit,); + assert_eq!( + commit.id().try_object()?.expect("exists").kind, + gix_object::Kind::Commit, + ); } } Ok(()) diff --git a/gix/tests/repository/shallow.rs b/gix/tests/repository/shallow.rs index 2cb93ac4446..db7f111a080 100644 --- a/gix/tests/repository/shallow.rs +++ b/gix/tests/repository/shallow.rs @@ -9,7 +9,12 @@ fn no() -> crate::Result { let repo = named_subrepo_opts("make_shallow_repo.sh", name, crate::restricted())?; assert!(!repo.is_shallow()); assert!(repo.shallow_commits()?.is_none()); - let commits: Vec<_> = repo.head_id()?.ancestors().all()?.collect::>()?; + let commits: Vec<_> = repo + .head_id()? + .ancestors() + .all()? + .map(|c| c.map(|c| c.id)) + .collect::>()?; let expected = if name == "base" { vec![ hex_to_id("30887839de28edf7ab66c860e5c58b4d445f6b12"), @@ -49,7 +54,12 @@ mod traverse { fn boundary_is_detected_triggering_no_error() -> crate::Result { for name in ["shallow.git", "shallow"] { let repo = named_subrepo_opts("make_shallow_repo.sh", name, crate::restricted())?; - let commits: Vec<_> = repo.head_id()?.ancestors().all()?.collect::>()?; + let commits: Vec<_> = repo + .head_id()? + .ancestors() + .all()? + .map(|c| c.map(|c| c.id)) + .collect::>()?; assert_eq!(commits, [hex_to_id("30887839de28edf7ab66c860e5c58b4d445f6b12")]); } Ok(()) @@ -78,6 +88,7 @@ mod traverse { .ancestors() .sorting(Sorting::ByCommitTimeNewestFirst) .all()? + .map(|c| c.map(|c| c.id)) .collect::>()?; assert_eq!( commits,