Skip to content

Commit 14dfcf0

Browse files
committed
feat: add tree::Editor
With it it's easy to alter existing trees or build entirely new ones, efficiently.
1 parent 71bf808 commit 14dfcf0

File tree

11 files changed

+1238
-277
lines changed

11 files changed

+1238
-277
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-object/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ gix-features = { version = "^0.38.2", path = "../gix-features", features = [
4141
"progress",
4242
] }
4343
gix-hash = { version = "^0.14.2", path = "../gix-hash" }
44+
gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" }
4445
gix-validate = { version = "^0.9.0", path = "../gix-validate" }
4546
gix-actor = { version = "^0.32.0", path = "../gix-actor" }
4647
gix-date = { version = "^0.9.0", path = "../gix-date" }
@@ -64,6 +65,8 @@ document-features = { version = "0.2.0", optional = true }
6465
criterion = "0.5.1"
6566
pretty_assertions = "1.0.0"
6667
gix-testtools = { path = "../tests/tools" }
68+
gix-odb = { path = "../gix-odb" }
69+
termtree = "0.5.1"
6770

6871
[package.metadata.docs.rs]
6972
all-features = true

gix-object/src/tree/editor.rs

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
use crate::tree::EntryKind;
2+
use crate::{tree, Tree};
3+
use bstr::{BStr, BString, ByteSlice, ByteVec};
4+
use gix_hash::ObjectId;
5+
use gix_hashtable::hash_map::Entry;
6+
use std::cmp::Ordering;
7+
8+
/// The state needed to apply edits instantly to in-memory trees.
9+
///
10+
/// It's made so that each tree is looked at in the object database at most once, and held in memory for
11+
/// all edits until everything is flushed to write all changed trees.
12+
///
13+
/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
14+
/// with some penalties.
15+
///
16+
/// ### Note
17+
///
18+
/// For reasons of efficiency, internally a SHA1 based hashmap is used to avoid having to store full paths
19+
/// to each edited tree. The chance of collision is low, but could be engineered to overwrite or write into
20+
/// an unintended tree.
21+
#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
22+
pub struct Editor<'a> {
23+
/// A way to lookup trees.
24+
find: &'a dyn crate::FindExt,
25+
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
26+
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
27+
/// dropped when writing at the latest.
28+
trees: gix_hashtable::HashMap<ObjectId, Tree>,
29+
/// A buffer to build up paths when finding the tree to edit.
30+
path_buf: BString,
31+
/// Our buffer for storing tree-data in, right before decoding it.
32+
tree_buf: Vec<u8>,
33+
}
34+
35+
/// Lifecycle
36+
impl<'a> Editor<'a> {
37+
/// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing
38+
/// trees when edits are made. Each tree will only be looked-up once and then edited in place from
39+
/// that point on.
40+
pub fn new(root: Tree, find: &'a dyn crate::FindExt) -> Self {
41+
Editor {
42+
find,
43+
trees: gix_hashtable::HashMap::from_iter(Some((empty_path_hash(), root))),
44+
path_buf: Vec::with_capacity(256).into(),
45+
tree_buf: Vec::with_capacity(512),
46+
}
47+
}
48+
}
49+
50+
/// Operations
51+
impl<'a> Editor<'a> {
52+
/// Write the entire in-memory state of all changed trees (and only changed trees) to `out`.
53+
/// Note that the returned object id *can* be the empty tree if everything was removed or if nothing
54+
/// was added to the tree.
55+
///
56+
/// The last call to `out` will be the changed root tree, whose object-id will also be returned.
57+
/// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist.
58+
/// We don't assure that as there is no validation that inserted entries are valid object ids.
59+
///
60+
/// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the
61+
/// just-written root-tree.
62+
/// If this is not desired, use [set_root()](Self::set_root()).
63+
pub fn write<E>(&mut self, mut out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
64+
assert_ne!(self.trees.len(), 0, "there is at least the root tree");
65+
66+
// back is for children, front is for parents.
67+
let mut parents = vec![(
68+
None::<usize>,
69+
BString::default(),
70+
self.trees
71+
.remove(&empty_path_hash())
72+
.expect("root tree is always present"),
73+
)];
74+
let mut children = Vec::new();
75+
while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) {
76+
let mut all_entries_unchanged_or_written = true;
77+
for entry in &tree.entries {
78+
if entry.mode.is_tree() {
79+
let prev_len = push_path_component(&mut rela_path, &entry.filename);
80+
if let Some(sub_tree) = self.trees.remove(&path_hash(&rela_path)) {
81+
all_entries_unchanged_or_written = false;
82+
let next_parent_idx = parents.len();
83+
children.push((Some(next_parent_idx), rela_path.clone(), sub_tree));
84+
}
85+
rela_path.truncate(prev_len);
86+
}
87+
}
88+
if all_entries_unchanged_or_written {
89+
tree.entries.retain(|e| !e.oid.is_null());
90+
if let Some((_, _, parent_to_adjust)) =
91+
parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero"))
92+
{
93+
let name = filename(rela_path.as_bstr());
94+
let entry_idx = parent_to_adjust
95+
.entries
96+
.binary_search_by(|e| cmp_entry_with_name(e, name, true))
97+
.expect("the parent always knows us by name");
98+
if tree.entries.is_empty() {
99+
parent_to_adjust.entries.remove(entry_idx);
100+
} else {
101+
parent_to_adjust.entries[entry_idx].oid = out(&tree)?;
102+
}
103+
} else if parents.is_empty() {
104+
debug_assert!(children.is_empty(), "we consume children before parents");
105+
debug_assert!(rela_path.is_empty(), "this should always be the root tree");
106+
107+
// There may be left-over trees if they are replaced with blobs for example.
108+
let root_tree_id = out(&tree)?;
109+
self.trees.clear();
110+
self.trees.insert(empty_path_hash(), tree);
111+
return Ok(root_tree_id);
112+
} else if !tree.entries.is_empty() {
113+
out(&tree)?;
114+
}
115+
} else {
116+
parents.push((parent_idx, rela_path, tree));
117+
}
118+
}
119+
120+
unreachable!("we exit as soon as everything is consumed")
121+
}
122+
123+
/// Remove the entry at `rela_path`, loading all trees on the path accordingly.
124+
/// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
125+
pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, crate::find::existing_object::Error>
126+
where
127+
I: IntoIterator<Item = C>,
128+
C: AsRef<BStr>,
129+
{
130+
self.upsert_or_remove(rela_path, None)
131+
}
132+
133+
/// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree,
134+
/// like `a/b/c`. Names are matched case-sensitively.
135+
///
136+
/// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database
137+
/// or will be made available at a later point to assure the integrity of the produced tree.
138+
///
139+
/// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries
140+
/// will be inserted into them instead.
141+
///
142+
/// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading
143+
/// through them will not be considered a problem.
144+
///
145+
/// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
146+
/// in Git trees.
147+
pub fn upsert<I, C>(
148+
&mut self,
149+
rela_path: I,
150+
kind: EntryKind,
151+
id: ObjectId,
152+
) -> Result<&mut Self, crate::find::existing_object::Error>
153+
where
154+
I: IntoIterator<Item = C>,
155+
C: AsRef<BStr>,
156+
{
157+
self.upsert_or_remove(rela_path, Some((kind, id)))
158+
}
159+
160+
fn upsert_or_remove<I, C>(
161+
&mut self,
162+
rela_path: I,
163+
kind_and_id: Option<(EntryKind, ObjectId)>,
164+
) -> Result<&mut Self, crate::find::existing_object::Error>
165+
where
166+
I: IntoIterator<Item = C>,
167+
C: AsRef<BStr>,
168+
{
169+
let mut cursor = self.trees.get_mut(&empty_path_hash()).expect("root is always present");
170+
self.path_buf.clear();
171+
let mut rela_path = rela_path.into_iter().peekable();
172+
let new_kind_is_tree = kind_and_id.map_or(false, |(kind, _)| kind == EntryKind::Tree);
173+
while let Some(name) = rela_path.next() {
174+
let name = name.as_ref();
175+
let is_last = rela_path.peek().is_none();
176+
let mut needs_sorting = false;
177+
let current_level_must_be_tree = !is_last || new_kind_is_tree;
178+
let check_type_change = |entry: &tree::Entry| entry.mode.is_tree() != current_level_must_be_tree;
179+
let tree_to_lookup = match cursor
180+
.entries
181+
.binary_search_by(|e| cmp_entry_with_name(e, name, false))
182+
.or_else(|file_insertion_idx| {
183+
cursor
184+
.entries
185+
.binary_search_by(|e| cmp_entry_with_name(e, name, true))
186+
.map_err(|dir_insertion_index| {
187+
if current_level_must_be_tree {
188+
dir_insertion_index
189+
} else {
190+
file_insertion_idx
191+
}
192+
})
193+
}) {
194+
Ok(idx) => {
195+
match kind_and_id {
196+
None => {
197+
if is_last {
198+
cursor.entries.remove(idx);
199+
break;
200+
} else {
201+
let entry = &cursor.entries[idx];
202+
if entry.mode.is_tree() {
203+
Some(entry.oid)
204+
} else {
205+
break;
206+
}
207+
}
208+
}
209+
Some((kind, id)) => {
210+
let entry = &mut cursor.entries[idx];
211+
if is_last {
212+
// unconditionally overwrite what's there.
213+
entry.oid = id;
214+
needs_sorting = check_type_change(entry);
215+
entry.mode = kind.into();
216+
None
217+
} else if entry.mode.is_tree() {
218+
// Possibly lookup the existing tree on our way down the path.
219+
Some(entry.oid)
220+
} else {
221+
// it is no tree, but we are traversing a path, so turn it into one.
222+
entry.oid = id.kind().null();
223+
needs_sorting = check_type_change(entry);
224+
entry.mode = EntryKind::Tree.into();
225+
None
226+
}
227+
}
228+
}
229+
}
230+
Err(insertion_idx) => match kind_and_id {
231+
None => break,
232+
Some((kind, id)) => {
233+
cursor.entries.insert(
234+
insertion_idx,
235+
tree::Entry {
236+
filename: name.into(),
237+
mode: if is_last { kind.into() } else { EntryKind::Tree.into() },
238+
oid: if is_last { id } else { id.kind().null() },
239+
},
240+
);
241+
if is_last {
242+
break;
243+
}
244+
None
245+
}
246+
},
247+
};
248+
if needs_sorting {
249+
cursor.entries.sort();
250+
}
251+
if is_last {
252+
break;
253+
}
254+
push_path_component(&mut self.path_buf, name);
255+
let path_id = path_hash(&self.path_buf);
256+
cursor = match self.trees.entry(path_id) {
257+
Entry::Occupied(e) => e.into_mut(),
258+
Entry::Vacant(e) => e.insert(
259+
if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) {
260+
self.find.find_tree(&tree_id, &mut self.tree_buf)?.into()
261+
} else {
262+
Tree::default()
263+
},
264+
),
265+
};
266+
}
267+
Ok(self)
268+
}
269+
270+
/// Set the root tree of the modification to `root`, assuring it has a well-known state.
271+
///
272+
/// Note that this erases all previous edits.
273+
///
274+
/// This is useful if the same editor is re-used for various trees.
275+
pub fn set_root(&mut self, root: Tree) -> &mut Self {
276+
self.trees.clear();
277+
self.trees.insert(empty_path_hash(), root);
278+
self
279+
}
280+
}
281+
282+
fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering {
283+
let common = a.filename.len().min(filename.len());
284+
a.filename[..common].cmp(&filename[..common]).then_with(|| {
285+
let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
286+
let b = filename.get(common).or_else(|| is_tree.then_some(&b'/'));
287+
a.cmp(&b)
288+
})
289+
}
290+
291+
fn filename(path: &BStr) -> &BStr {
292+
path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
293+
}
294+
295+
fn empty_path_hash() -> ObjectId {
296+
gix_features::hash::hasher(gix_hash::Kind::Sha1).digest().into()
297+
}
298+
299+
fn path_hash(path: &[u8]) -> ObjectId {
300+
let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1);
301+
hasher.update(path);
302+
hasher.digest().into()
303+
}
304+
305+
fn push_path_component(base: &mut BString, component: &[u8]) -> usize {
306+
let prev_len = base.len();
307+
debug_assert!(base.last() != Some(&b'/'));
308+
if !base.is_empty() {
309+
base.push_byte(b'/');
310+
}
311+
base.push_str(component);
312+
prev_len
313+
}

gix-object/src/tree/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ use crate::{
55
tree,
66
};
77

8+
mod editor;
9+
pub use editor::Editor;
10+
811
mod ref_iter;
912
///
1013
pub mod write;

gix-object/src/tree/write.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ impl crate::WriteTo for Tree {
2727
/// Serialize this tree to `out` in the git internal format.
2828
fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> {
2929
debug_assert_eq!(
30+
&self.entries,
3031
&{
3132
let mut entries_sorted = self.entries.clone();
3233
entries_sorted.sort();
3334
entries_sorted
3435
},
35-
&self.entries,
3636
"entries for serialization must be sorted by filename"
3737
);
3838
let mut buf = Default::default();

0 commit comments

Comments
 (0)