Skip to content

Commit eeb584c

Browse files
committed
std: initial sketch of workcache, barely does anything.
1 parent 105a0b9 commit eeb584c

File tree

2 files changed

+312
-0
lines changed

2 files changed

+312
-0
lines changed

src/libstd/std.rc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ pub mod par;
8585
pub mod cmp;
8686
pub mod base64;
8787
pub mod rl;
88+
pub mod workcache;
8889

8990
#[cfg(unicode)]
9091
mod unicode;

src/libstd/workcache.rs

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
extern mod std;
2+
3+
use core::cmp::Eq;
4+
use send_map::linear::LinearMap;
5+
use pipes::{recv, oneshot, PortOne, send_one};
6+
use either::{Right,Left,Either};
7+
8+
use std::json;
9+
use std::sha1;
10+
use std::serialization::{Serializer,Serializable,
11+
Deserializer,Deserializable,
12+
deserialize};
13+
14+
/**
15+
*
16+
* This is a loose clone of the fbuild build system, made a touch more
17+
* generic (not wired to special cases on files) and much less metaprogram-y
18+
* due to rust's comparative weakness there, relative to python.
19+
*
20+
* It's based around _imperative bulids_ that happen to have some function
21+
* calls cached. That is, it's _just_ a mechanism for describing cached
22+
* functions. This makes it much simpler and smaller than a "build system"
23+
* that produces an IR and evaluates it. The evaluation order is normal
24+
* function calls. Some of them just return really quickly.
25+
*
26+
* A cached function consumes and produces a set of _works_. A work has a
27+
* name, a kind (that determines how the value is to be checked for
28+
* freshness) and a value. Works must also be (de)serializable. Some
29+
* examples of works:
30+
*
31+
* kind name value
32+
* ------------------------
33+
* cfg os linux
34+
* file foo.c <sha1>
35+
* url foo.com <etag>
36+
*
37+
* Works are conceptually single units, but we store them most of the time
38+
* in maps of the form (type,name) => value. These are WorkMaps.
39+
*
40+
* A cached function divides the works it's interested up into inputs and
41+
* outputs, and subdivides those into declared (input and output) works and
42+
* discovered (input and output) works.
43+
*
44+
* A _declared_ input or output is one that is given to the workcache before
45+
* any work actually happens, in the "prep" phase. Even when a function's
46+
* work-doing part (the "exec" phase) never gets called, it has declared
47+
* inputs and outputs, which can be checked for freshness (and potentially
48+
* used to determine that the function can be skipped).
49+
*
50+
* The workcache checks _all_ works for freshness, but uses the set of
51+
* discovered outputs from the _previous_ exec (which it will re-discover
52+
* and re-record each time the exec phase runs).
53+
*
54+
* Therefore the discovered works cached in the db might be a
55+
* mis-approximation of the current discoverable works, but this is ok for
56+
* the following reason: we assume that if an artifact A changed from
57+
* depending on B,C,D to depending on B,C,D,E, then A itself changed (as
58+
* part of the change-in-dependencies), so we will be ok.
59+
*
60+
* Each function has a single discriminated output work called its _result_.
61+
* This is only different from other works in that it is returned, by value,
62+
* from a call to the cacheable function; the other output works are used in
63+
* passing to invalidate dependencies elsewhere in the cache, but do not
64+
* otherwise escape from a function invocation. Most functions only have one
65+
* output work anyways.
66+
*
67+
* A database (the central store of a workcache) stores a mappings:
68+
*
69+
* (fn_name,{declared_input}) => ({declared_output},{discovered_input},
70+
* {discovered_output},result)
71+
*
72+
*/
73+
74+
struct WorkKey {
75+
kind: ~str,
76+
name: ~str
77+
}
78+
79+
impl WorkKey: to_bytes::IterBytes {
80+
#[inline(always)]
81+
pure fn iter_bytes(lsb0: bool, f: to_bytes::Cb) {
82+
let mut flag = true;
83+
self.kind.iter_bytes(lsb0, |bytes| {flag = f(bytes); flag});
84+
if !flag { return; }
85+
self.name.iter_bytes(lsb0, f);
86+
}
87+
}
88+
89+
impl WorkKey {
90+
static fn new(kind: &str, name: &str) -> WorkKey {
91+
WorkKey { kind: kind.to_owned(), name: name.to_owned() }
92+
}
93+
}
94+
95+
impl WorkKey: core::cmp::Eq {
96+
pure fn eq(&self, other: &WorkKey) -> bool {
97+
self.kind == other.kind && self.name == other.name
98+
}
99+
pure fn ne(&self, other: &WorkKey) -> bool {
100+
self.kind != other.kind || self.name != other.name
101+
}
102+
}
103+
104+
type WorkMap = LinearMap<WorkKey, ~str>;
105+
106+
struct Database {
107+
// XXX: Fill in.
108+
a: ()
109+
}
110+
111+
impl Database {
112+
pure fn prepare(_fn_name: &str,
113+
_declared_inputs: &const WorkMap) ->
114+
Option<(WorkMap, WorkMap, WorkMap, ~str)> {
115+
// XXX: load
116+
None
117+
}
118+
pure fn cache(_fn_name: &str,
119+
_declared_inputs: &WorkMap,
120+
_declared_outputs: &WorkMap,
121+
_discovered_inputs: &WorkMap,
122+
_discovered_outputs: &WorkMap,
123+
_result: &str) {
124+
// XXX: store
125+
}
126+
}
127+
128+
struct Logger {
129+
// XXX: Fill in
130+
a: ()
131+
}
132+
133+
struct Context {
134+
db: @Database,
135+
logger: @Logger,
136+
cfg: @json::Object,
137+
freshness: LinearMap<~str,~fn(&str,&str)->bool>
138+
}
139+
140+
struct Prep {
141+
ctxt: @Context,
142+
fn_name: ~str,
143+
declared_inputs: WorkMap,
144+
declared_outputs: WorkMap
145+
}
146+
147+
struct Exec {
148+
discovered_inputs: WorkMap,
149+
discovered_outputs: WorkMap
150+
}
151+
152+
struct Work<T:Send> {
153+
prep: @mut Prep,
154+
res: Option<Either<T,PortOne<(Exec,T)>>>
155+
}
156+
157+
fn digest<T:Serializable<json::Serializer>
158+
Deserializable<json::Deserializer>>(t: &T) -> ~str {
159+
let sha = sha1::sha1();
160+
let s = do io::with_str_writer |wr| {
161+
// XXX: sha1 should be a writer itself, shouldn't
162+
// go via strings.
163+
t.serialize(&json::Serializer(wr));
164+
};
165+
sha.input_str(s);
166+
sha.result_str()
167+
}
168+
169+
fn digest_file(path: &Path) -> ~str {
170+
let sha = sha1::sha1();
171+
let s = io::read_whole_file_str(path);
172+
sha.input_str(*s.get_ref());
173+
sha.result_str()
174+
}
175+
176+
impl Context {
177+
178+
static fn new(db: @Database, lg: @Logger,
179+
cfg: @json::Object) -> Context {
180+
Context {db: db, logger: lg, cfg: cfg, freshness: LinearMap()}
181+
}
182+
183+
fn prep<T:Send
184+
Serializable<json::Serializer>
185+
Deserializable<json::Deserializer>>(
186+
@self,
187+
fn_name:&str,
188+
blk: fn((@mut Prep))->Work<T>) -> Work<T> {
189+
let p = @mut Prep {ctxt: self,
190+
fn_name: fn_name.to_owned(),
191+
declared_inputs: LinearMap(),
192+
declared_outputs: LinearMap()};
193+
blk(p)
194+
}
195+
}
196+
197+
impl Prep {
198+
fn declare_input(&mut self, kind:&str, name:&str, val:&str) {
199+
self.declared_inputs.insert(WorkKey::new(kind, name),
200+
val.to_owned());
201+
}
202+
203+
fn declare_output(&mut self, kind:&str, name:&str, val:&str) {
204+
self.declared_outputs.insert(WorkKey::new(kind, name),
205+
val.to_owned());
206+
}
207+
208+
fn exec<T:Send
209+
Serializable<json::Serializer>
210+
Deserializable<json::Deserializer>>(
211+
@mut self, blk: ~fn(&Exec) -> T) -> Work<T> {
212+
let cached = self.ctxt.db.prepare(self.fn_name,
213+
&self.declared_inputs);
214+
215+
match move cached {
216+
None => (),
217+
Some((move _decl_out,
218+
move _disc_in,
219+
move _disc_out,
220+
move res)) => {
221+
// XXX: check deps for freshness, only return if fresh.
222+
let v : T = do io::with_str_reader(res) |rdr| {
223+
let j = result::unwrap(json::from_reader(rdr));
224+
deserialize(&json::Deserializer(move j))
225+
};
226+
return Work::new(self, move Left(move v));
227+
}
228+
}
229+
230+
let (chan, port) = oneshot::init();
231+
232+
let chan = ~mut Some(move chan);
233+
do task::spawn |move blk, move chan| {
234+
let exe = Exec { discovered_inputs: LinearMap(),
235+
discovered_outputs: LinearMap() };
236+
let chan = option::swap_unwrap(&mut *chan);
237+
let v = blk(&exe);
238+
send_one(move chan, (move exe, move v));
239+
}
240+
241+
Work::new(self, move Right(move port))
242+
}
243+
}
244+
245+
impl<T:Send
246+
Serializable<json::Serializer>
247+
Deserializable<json::Deserializer>>
248+
Work<T> {
249+
static fn new(p: @mut Prep, e: Either<T,PortOne<(Exec,T)>>) -> Work<T> {
250+
move Work { prep: p, res: Some(move e) }
251+
}
252+
}
253+
254+
// FIXME (#3724): movable self. This should be in impl Work.
255+
fn unwrap<T:Send
256+
Serializable<json::Serializer>
257+
Deserializable<json::Deserializer>>(w: Work<T>) -> T {
258+
259+
let mut ww = move w;
260+
let mut s = None;
261+
262+
ww.res <-> s;
263+
264+
match move s {
265+
None => fail,
266+
Some(Left(move v)) => move v,
267+
Some(Right(move port)) => {
268+
269+
let (exe, v) = match recv(move port) {
270+
oneshot::send(move data) => move data
271+
};
272+
273+
let s = do io::with_str_writer |wr| {
274+
v.serialize(&json::Serializer(wr));
275+
};
276+
277+
ww.prep.ctxt.db.cache(ww.prep.fn_name,
278+
&ww.prep.declared_inputs,
279+
&ww.prep.declared_outputs,
280+
&exe.discovered_inputs,
281+
&exe.discovered_outputs,
282+
s);
283+
move v
284+
}
285+
}
286+
}
287+
288+
#[test]
289+
fn test() {
290+
use io::WriterUtil;
291+
let db = @Database { a: () };
292+
let lg = @Logger { a: () };
293+
let cfg = @LinearMap();
294+
let cx = @Context::new(db, lg, cfg);
295+
let w:Work<~str> = do cx.prep("test1") |prep| {
296+
let pth = Path("foo.c");
297+
{
298+
let file = io::file_writer(&pth, [io::Create]).get();
299+
file.write_str("void main() { }");
300+
}
301+
302+
prep.declare_input("file", pth.to_str(), digest_file(&pth));
303+
do prep.exec |_exe| {
304+
let out = Path("foo.o");
305+
run::run_program("gcc", [~"foo.c", ~"-o", out.to_str()]);
306+
move out.to_str()
307+
}
308+
};
309+
let s = unwrap(move w);
310+
io::println(s);
311+
}

0 commit comments

Comments
 (0)