From 724601d474ce4acd30ee482e77fd96be2fe3425b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 13 Jan 2025 15:59:58 +0100 Subject: [PATCH 1/2] fix: improve handling of overflows when there are more pack than we can hold. Internally, there is a statically allocated vec which holds opened packs and indices. When reconciling the disk-state with what's currently loaded, it was possible to get into a situation where there were more files than we could fit into the slotmap and got into an invalid state. The system now generously trashes existing slots to be able to load more rencent ones, which seems to help in this situation. It's probably still leading to strange situations where not all objects can be read. --- gix-odb/src/lib.rs | 2 +- gix-odb/src/store_impls/dynamic/load_index.rs | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/gix-odb/src/lib.rs b/gix-odb/src/lib.rs index ca91fa7a185..e108e8f4df4 100644 --- a/gix-odb/src/lib.rs +++ b/gix-odb/src/lib.rs @@ -119,7 +119,7 @@ pub struct Store { /// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it. /// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices. - /// The map is static and cannot typically change. + /// The map is static and cannot change. /// It's read often and changed rarely. pub(crate) files: Vec, diff --git a/gix-odb/src/store_impls/dynamic/load_index.rs b/gix-odb/src/store_impls/dynamic/load_index.rs index a83381976cb..72594f8f552 100644 --- a/gix-odb/src/store_impls/dynamic/load_index.rs +++ b/gix-odb/src/store_impls/dynamic/load_index.rs @@ -266,7 +266,7 @@ impl super::Store { Option::as_ref(&files_guard).expect("slot is set or we wouldn't know it points to this file"); if index_info.is_multi_index() && files.mtime() != mtime { // we have a changed multi-pack index. We can't just change the existing slot as it may alter slot indices - // that are currently available. Instead we have to move what's there into a new slot, along with the changes, + // that are currently available. Instead, we have to move what's there into a new slot, along with the changes, // and later free the slot or dispose of the index in the slot (like we do for removed/missing files). index_paths_to_add.push_back((index_info, mtime, Some(slot_idx))); // If the current slot is loaded, the soon-to-be copied multi-index path will be loaded as well. @@ -304,6 +304,12 @@ impl super::Store { needed: index_paths_to_add.len() + 1, /*the one currently popped off*/ }); } + // Don't allow duplicate indicates, we need a 1:1 mapping. + if new_slot_map_indices.contains(&next_possibly_free_index) { + next_possibly_free_index = (next_possibly_free_index + 1) % self.files.len(); + num_indices_checked += 1; + continue 'increment_slot_index; + } let slot_index = next_possibly_free_index; let slot = &self.files[slot_index]; next_possibly_free_index = (next_possibly_free_index + 1) % self.files.len(); @@ -502,7 +508,7 @@ impl super::Store { } // Unlike libgit2, do not sort by modification date, but by size and put the biggest indices first. That way // the chance to hit an object should be higher. We leave it to the handle to sort by LRU. - // Git itself doesn't change the order which may safe time, but we want it to be stable which also helps some tests. + // Git itself doesn't change the order which may save time, but we want it to be stable which also helps some tests. // NOTE: this will work well for well-packed repos or those using geometric repacking, but force us to open a lot // of files when dealing with new objects, as there is no notion of recency here as would be with unmaintained // repositories. Different algorithms should be provided, like newest packs first, and possibly a mix of both @@ -512,7 +518,7 @@ impl super::Store { Ok(indices_by_modification_time) } - /// returns Ok if the copy could happen because dest-slot was actually free or disposable , and Some(true) if it was empty + /// returns `Ok(dest_slot_was_empty)` if the copy could happen because dest-slot was actually free or disposable. #[allow(clippy::too_many_arguments)] fn try_set_index_slot( lock: &parking_lot::MutexGuard<'_, ()>, From dbf079f4b70db01ae4f850796ae6006d45d3f99c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 9 Jan 2025 08:28:27 +0100 Subject: [PATCH 2/2] reproduce issue with 'too many packs' for slotmap --- gix/src/commit.rs | 8 +++ .../connection/fetch/update_refs/mod.rs | 2 + .../connection/fetch/update_refs/update.rs | 2 + gix/tests/gix/remote/fetch.rs | 68 +++++++++++++++++++ 4 files changed, 80 insertions(+) diff --git a/gix/src/commit.rs b/gix/src/commit.rs index ad191f64a4a..d9c8d635ba6 100644 --- a/gix/src/commit.rs +++ b/gix/src/commit.rs @@ -1,6 +1,8 @@ //! #![allow(clippy::empty_docs)] +use std::convert::Infallible; + /// An empty array of a type usable with the `gix::easy` API to help declaring no parents should be used pub const NO_PARENT_IDS: [gix_hash::ObjectId; 0] = []; @@ -22,6 +24,12 @@ pub enum Error { ReferenceEdit(#[from] crate::reference::edit::Error), } +impl From for Error { + fn from(_value: Infallible) -> Self { + unreachable!("cannot be invoked") + } +} + /// #[cfg(feature = "revision")] pub mod describe { diff --git a/gix/src/remote/connection/fetch/update_refs/mod.rs b/gix/src/remote/connection/fetch/update_refs/mod.rs index bb3c4f5e379..640b028865e 100644 --- a/gix/src/remote/connection/fetch/update_refs/mod.rs +++ b/gix/src/remote/connection/fetch/update_refs/mod.rs @@ -102,6 +102,8 @@ pub(crate) fn update( let update = if is_implicit_tag { Mode::ImplicitTagNotSentByRemote.into() } else { + // Assure the ODB is not to blame for the missing object. + repo.try_find_object(remote_id)?; Mode::RejectedSourceObjectNotFound { id: remote_id.into() }.into() }; updates.push(update); diff --git a/gix/src/remote/connection/fetch/update_refs/update.rs b/gix/src/remote/connection/fetch/update_refs/update.rs index 741fc12f5ba..f5adb348e2a 100644 --- a/gix/src/remote/connection/fetch/update_refs/update.rs +++ b/gix/src/remote/connection/fetch/update_refs/update.rs @@ -23,6 +23,8 @@ mod error { PeelToId(#[from] crate::reference::peel::Error), #[error("Failed to follow a symbolic reference to assure worktree isn't affected")] FollowSymref(#[from] gix_ref::file::find::existing::Error), + #[error(transparent)] + FindObject(#[from] crate::object::find::Error), } } diff --git a/gix/tests/gix/remote/fetch.rs b/gix/tests/gix/remote/fetch.rs index aa4d75f6241..00221bf95d4 100644 --- a/gix/tests/gix/remote/fetch.rs +++ b/gix/tests/gix/remote/fetch.rs @@ -85,6 +85,74 @@ mod blocking_and_async_io { try_repo_rw(name).unwrap() } + #[test] + #[cfg(feature = "blocking-network-client")] + fn fetch_more_packs_than_can_be_handled() -> gix_testtools::Result { + use gix::config::tree::User; + use gix::interrupt::IS_INTERRUPTED; + use gix_odb::store::init::Slots; + use gix_testtools::tempfile; + fn create_empty_commit(repo: &gix::Repository) -> anyhow::Result<()> { + let name = repo.head_name()?.expect("no detached head"); + repo.commit( + name.as_bstr(), + "empty", + gix::hash::ObjectId::empty_tree(repo.object_hash()), + repo.try_find_reference(name.as_ref())?.map(|r| r.id()), + )?; + Ok(()) + } + for max_packs in 1..=3 { + let remote_dir = tempfile::tempdir()?; + let mut remote_repo = gix::init_bare(remote_dir.path())?; + { + let mut config = remote_repo.config_snapshot_mut(); + config.set_value(&User::NAME, "author")?; + config.set_value(&User::EMAIL, "email@example.com")?; + } + create_empty_commit(&remote_repo)?; + + let local_dir = tempfile::tempdir()?; + let (local_repo, _) = gix::clone::PrepareFetch::new( + remote_repo.path(), + local_dir.path(), + gix::create::Kind::Bare, + Default::default(), + gix::open::Options::isolated().object_store_slots(Slots::Given(max_packs)), + )? + .fetch_only(gix::progress::Discard, &IS_INTERRUPTED)?; + + let remote = local_repo + .branch_remote( + local_repo.head_ref()?.expect("branch available").name().shorten(), + Fetch, + ) + .expect("remote is configured after clone")?; + for _round_to_create_pack in 1..12 { + create_empty_commit(&remote_repo)?; + match remote + .connect(Fetch)? + .prepare_fetch(gix::progress::Discard, Default::default())? + .receive(gix::progress::Discard, &IS_INTERRUPTED) + { + Ok(out) => { + for local_tracking_branch_name in out.ref_map.mappings.into_iter().filter_map(|m| m.local) { + let r = local_repo.find_reference(&local_tracking_branch_name)?; + r.id() + .object() + .expect("object should be present after fetching, triggering pack refreshes works"); + local_repo.head_ref()?.unwrap().set_target_id(r.id(), "post fetch")?; + } + } + Err(err) => assert!(err + .to_string() + .starts_with("The slotmap turned out to be too small with ")), + } + } + } + Ok(()) + } + #[test] #[cfg(feature = "blocking-network-client")] #[allow(clippy::result_large_err)]