Skip to content

Commit ca5294a

Browse files
committed
feat: add InMemoryPassThrough implementation.
An implementation of `Header`, `Write` and `Find`, that can optionally write everything to an in-memory store, and if enabled, also read objects back from there. That way it can present a consistent view to objects from two locations.
1 parent b279957 commit ca5294a

File tree

6 files changed

+379
-0
lines changed

6 files changed

+379
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-odb/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"]
2121

2222
[dependencies]
2323
gix-features = { version = "^0.38.2", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] }
24+
gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" }
2425
gix-hash = { version = "^0.14.2", path = "../gix-hash" }
2526
gix-date = { version = "^0.9.0", path = "../gix-date" }
2627
gix-path = { version = "^0.10.10", path = "../gix-path" }

gix-odb/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ pub fn sink(object_hash: gix_hash::Kind) -> Sink {
6666
}
6767
}
6868

69+
///
70+
pub mod memory;
71+
6972
mod sink;
7073

7174
///

gix-odb/src/memory.rs

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
use crate::find::Header;
2+
use crate::Cache;
3+
use gix_object::Data;
4+
use std::cell::RefCell;
5+
use std::ops::{Deref, DerefMut};
6+
use std::rc::Rc;
7+
use std::sync::Arc;
8+
9+
/// An object database to read from any implementation but write to memory.
10+
/// Previously written objects can be returned from memory upon query, which makes the view of objects consistent.
11+
/// In-Memory objects can be disabled by [taking out its storage](Proxy::take_object_memory). From there in-memory
12+
/// object can also be persisted one by one.
13+
///
14+
/// It's possible to turn off the memory by removing it from the instance.
15+
pub struct Proxy<T> {
16+
/// The actual odb implementation
17+
inner: T,
18+
/// The kind of hash to produce when writing new objects.
19+
object_hash: gix_hash::Kind,
20+
/// The storage for in-memory objects.
21+
/// If `None`, the proxy will always read from and write-through to `inner`.
22+
memory: Option<RefCell<Storage>>,
23+
}
24+
25+
/// Lifecycle
26+
impl<T> Proxy<T> {
27+
/// Create a new instance using `odb` as actual object provider, with an empty in-memory store for
28+
/// objects that are to be written.
29+
/// Use `object_hash` to determine the kind of hash to produce when writing new objects.
30+
pub fn new(odb: T, object_hash: gix_hash::Kind) -> Proxy<T> {
31+
Proxy {
32+
inner: odb,
33+
object_hash,
34+
memory: Some(Default::default()),
35+
}
36+
}
37+
38+
/// Turn ourselves into our inner object database, while deallocating objects stored in memory.
39+
pub fn into_inner(self) -> T {
40+
self.inner
41+
}
42+
43+
/// Strip object memory off this instance, which means that writes will go through to the inner object database
44+
/// right away.
45+
/// This mode makes the proxy fully transparent.
46+
pub fn with_write_passthrough(mut self) -> Self {
47+
self.memory.take();
48+
self
49+
}
50+
}
51+
52+
impl Proxy<Cache<crate::store::Handle<Arc<crate::Store>>>> {
53+
/// No op, as we are containing an arc handle already.
54+
pub fn into_arc(self) -> std::io::Result<Proxy<Cache<crate::store::Handle<Arc<crate::Store>>>>> {
55+
Ok(self)
56+
}
57+
}
58+
59+
impl Proxy<Cache<crate::store::Handle<Rc<crate::Store>>>> {
60+
/// Create an entirely new instance, but with the in-memory objects moving between them.
61+
pub fn into_arc(self) -> std::io::Result<Proxy<Cache<crate::store::Handle<Arc<crate::Store>>>>> {
62+
Ok(Proxy {
63+
inner: self.inner.into_arc()?,
64+
object_hash: self.object_hash,
65+
memory: self.memory,
66+
})
67+
}
68+
}
69+
70+
impl From<crate::Handle> for Proxy<crate::Handle> {
71+
fn from(odb: crate::Handle) -> Self {
72+
let object_hash = odb.store.object_hash;
73+
Proxy::new(odb, object_hash)
74+
}
75+
}
76+
77+
/// Memory Access
78+
impl<T> Proxy<T> {
79+
/// Take all the objects in memory so far, with the memory storage itself and return it.
80+
///
81+
/// The instance will remain in a state where it won't be able to store objects in memory at all,
82+
/// they will now be stored in the underlying object database.
83+
/// This mode makes the proxy fully transparent.
84+
///
85+
/// To avoid that, use [`reset_object_memory()`](Self::reset_object_memory()) or return the storage
86+
/// using [`set_object_memory()`](Self::set_object_memory()).
87+
pub fn take_object_memory(&mut self) -> Option<Storage> {
88+
self.memory.take().map(RefCell::into_inner)
89+
}
90+
91+
/// Set the object storage to contain only `new` objects, and return whichever objects were there previously.
92+
pub fn set_object_memory(&mut self, new: Storage) -> Option<Storage> {
93+
let previous = self.take_object_memory();
94+
self.memory = Some(RefCell::new(new));
95+
previous
96+
}
97+
98+
/// If objects aren't written to memory yet, this will happen after the call.
99+
///
100+
/// Otherwise, no change will be performed.
101+
pub fn enable_object_memory(&mut self) -> &mut Self {
102+
if self.memory.is_none() {
103+
self.memory = Some(Default::default());
104+
}
105+
self
106+
}
107+
108+
/// Reset the internal storage to be empty, and return the previous storage, with all objects
109+
/// it contained.
110+
///
111+
/// Note that this does nothing if this instance didn't contain object memory in the first place.
112+
/// In that case, set it explicitly.
113+
pub fn reset_object_memory(&self) -> Option<Storage> {
114+
self.memory.as_ref().map(|m| std::mem::take(&mut *m.borrow_mut()))
115+
}
116+
117+
/// Return the amount of objects currently stored in memory.
118+
pub fn num_objects_in_memory(&self) -> usize {
119+
self.memory.as_ref().map_or(0, |m| m.borrow().len())
120+
}
121+
}
122+
123+
impl<T> Clone for Proxy<T>
124+
where
125+
T: Clone,
126+
{
127+
fn clone(&self) -> Self {
128+
Proxy {
129+
inner: self.inner.clone(),
130+
object_hash: self.object_hash,
131+
memory: self.memory.clone(),
132+
}
133+
}
134+
}
135+
136+
impl<T> gix_object::Find for Proxy<T>
137+
where
138+
T: gix_object::Find,
139+
{
140+
fn try_find<'a>(
141+
&self,
142+
id: &gix_hash::oid,
143+
buffer: &'a mut Vec<u8>,
144+
) -> Result<Option<Data<'a>>, gix_object::find::Error> {
145+
if let Some(map) = self.memory.as_ref() {
146+
let map = map.borrow();
147+
if let Some((kind, data)) = map.get(id) {
148+
buffer.clear();
149+
buffer.extend_from_slice(data);
150+
return Ok(Some(Data {
151+
kind: *kind,
152+
data: &*buffer,
153+
}));
154+
}
155+
}
156+
self.inner.try_find(id, buffer)
157+
}
158+
}
159+
160+
impl<T> gix_object::Exists for Proxy<T>
161+
where
162+
T: gix_object::Exists,
163+
{
164+
fn exists(&self, id: &gix_hash::oid) -> bool {
165+
self.memory.as_ref().map_or(false, |map| map.borrow().contains_key(id)) || self.inner.exists(id)
166+
}
167+
}
168+
169+
impl<T> crate::Header for Proxy<T>
170+
where
171+
T: crate::Header,
172+
{
173+
fn try_header(&self, id: &gix_hash::oid) -> Result<Option<Header>, gix_object::find::Error> {
174+
if let Some(map) = self.memory.as_ref() {
175+
let map = map.borrow();
176+
if let Some((kind, data)) = map.get(id) {
177+
return Ok(Some(Header::Loose {
178+
kind: *kind,
179+
size: data.len() as u64,
180+
}));
181+
}
182+
}
183+
self.inner.try_header(id)
184+
}
185+
}
186+
187+
impl<T> gix_object::FindHeader for Proxy<T>
188+
where
189+
T: gix_object::FindHeader,
190+
{
191+
fn try_header(&self, id: &gix_hash::oid) -> Result<Option<gix_object::Header>, gix_object::find::Error> {
192+
if let Some(map) = self.memory.as_ref() {
193+
let map = map.borrow();
194+
if let Some((kind, data)) = map.get(id) {
195+
return Ok(Some(gix_object::Header {
196+
kind: *kind,
197+
size: data.len() as u64,
198+
}));
199+
}
200+
}
201+
self.inner.try_header(id)
202+
}
203+
}
204+
205+
impl<T> crate::Write for Proxy<T>
206+
where
207+
T: crate::Write,
208+
{
209+
fn write_stream(
210+
&self,
211+
kind: gix_object::Kind,
212+
size: u64,
213+
from: &mut dyn std::io::Read,
214+
) -> Result<gix_hash::ObjectId, crate::write::Error> {
215+
let Some(map) = self.memory.as_ref() else {
216+
return self.inner.write_stream(kind, size, from);
217+
};
218+
219+
let mut buf = Vec::new();
220+
from.read_to_end(&mut buf)?;
221+
222+
let id = gix_object::compute_hash(self.object_hash, kind, &buf);
223+
map.borrow_mut().insert(id, (kind, buf));
224+
Ok(id)
225+
}
226+
}
227+
228+
impl<T> Deref for Proxy<T> {
229+
type Target = T;
230+
231+
fn deref(&self) -> &Self::Target {
232+
&self.inner
233+
}
234+
}
235+
236+
impl<T> DerefMut for Proxy<T> {
237+
fn deref_mut(&mut self) -> &mut Self::Target {
238+
&mut self.inner
239+
}
240+
}
241+
242+
/// A mapping between an object id and all data corresponding to an object, acting like a `HashMap<ObjectID, (Kind, Data)>`.
243+
#[derive(Default, Debug, Clone, Eq, PartialEq)]
244+
pub struct Storage(gix_hashtable::HashMap<gix_hash::ObjectId, (gix_object::Kind, Vec<u8>)>);
245+
246+
impl Deref for Storage {
247+
type Target = gix_hashtable::HashMap<gix_hash::ObjectId, (gix_object::Kind, Vec<u8>)>;
248+
249+
fn deref(&self) -> &Self::Target {
250+
&self.0
251+
}
252+
}
253+
254+
impl DerefMut for Storage {
255+
fn deref_mut(&mut self) -> &mut Self::Target {
256+
&mut self.0
257+
}
258+
}

0 commit comments

Comments
 (0)