Skip to content

Commit acd8e59

Browse files
committed
build-manifest: calculate checksums lazily and in parallel
This commit improves the way build-manifest calculates the checksums included in the manifest, speeding it up: * Instead of calculating all the hashes beforehand and then using the ones we need, the manifest is first generated with placeholder hashes, and then a function walks through the manifest and calculates only the needed checksums. * Calculating the checksums is now done in parallel with rayon, to better utilize all the available disk bandwidth. * Calculating the checksums now uses the sha2 crate instead of the sha256sum CLI tool: this avoids the overhead of calling another process, but more importantly uses hardware acceleration whenever available (the CLI tool doesn't support it at all).
1 parent 0375ee8 commit acd8e59

File tree

4 files changed

+164
-29
lines changed

4 files changed

+164
-29
lines changed

Cargo.lock

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,16 @@ dependencies = [
183183
"block-padding",
184184
"byte-tools",
185185
"byteorder",
186-
"generic-array",
186+
"generic-array 0.12.3",
187+
]
188+
189+
[[package]]
190+
name = "block-buffer"
191+
version = "0.9.0"
192+
source = "registry+https://github.com/rust-lang/crates.io-index"
193+
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
194+
dependencies = [
195+
"generic-array 0.14.4",
187196
]
188197

189198
[[package]]
@@ -233,8 +242,11 @@ version = "0.1.0"
233242
dependencies = [
234243
"anyhow",
235244
"flate2",
245+
"hex 0.4.2",
246+
"rayon",
236247
"serde",
237248
"serde_json",
249+
"sha2",
238250
"tar",
239251
"toml",
240252
]
@@ -687,6 +699,12 @@ version = "0.8.0"
687699
source = "registry+https://github.com/rust-lang/crates.io-index"
688700
checksum = "9a21fa21941700a3cd8fcb4091f361a6a712fac632f85d9f487cc892045d55c6"
689701

702+
[[package]]
703+
name = "cpuid-bool"
704+
version = "0.1.2"
705+
source = "registry+https://github.com/rust-lang/crates.io-index"
706+
checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634"
707+
690708
[[package]]
691709
name = "crates-io"
692710
version = "0.31.1"
@@ -884,7 +902,16 @@ version = "0.8.1"
884902
source = "registry+https://github.com/rust-lang/crates.io-index"
885903
checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
886904
dependencies = [
887-
"generic-array",
905+
"generic-array 0.12.3",
906+
]
907+
908+
[[package]]
909+
name = "digest"
910+
version = "0.9.0"
911+
source = "registry+https://github.com/rust-lang/crates.io-index"
912+
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
913+
dependencies = [
914+
"generic-array 0.14.4",
888915
]
889916

890917
[[package]]
@@ -1166,6 +1193,16 @@ dependencies = [
11661193
"typenum",
11671194
]
11681195

1196+
[[package]]
1197+
name = "generic-array"
1198+
version = "0.14.4"
1199+
source = "registry+https://github.com/rust-lang/crates.io-index"
1200+
checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817"
1201+
dependencies = [
1202+
"typenum",
1203+
"version_check",
1204+
]
1205+
11691206
[[package]]
11701207
name = "getopts"
11711208
version = "0.2.21"
@@ -1844,9 +1881,9 @@ version = "0.8.0"
18441881
source = "registry+https://github.com/rust-lang/crates.io-index"
18451882
checksum = "a18af3dcaf2b0219366cdb4e2af65a6101457b415c3d1a5c71dd9c2b7c77b9c8"
18461883
dependencies = [
1847-
"block-buffer",
1848-
"digest",
1849-
"opaque-debug",
1884+
"block-buffer 0.7.3",
1885+
"digest 0.8.1",
1886+
"opaque-debug 0.2.3",
18501887
]
18511888

18521889
[[package]]
@@ -2106,6 +2143,12 @@ version = "0.2.3"
21062143
source = "registry+https://github.com/rust-lang/crates.io-index"
21072144
checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
21082145

2146+
[[package]]
2147+
name = "opaque-debug"
2148+
version = "0.3.0"
2149+
source = "registry+https://github.com/rust-lang/crates.io-index"
2150+
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
2151+
21092152
[[package]]
21102153
name = "open"
21112154
version = "1.4.0"
@@ -4371,10 +4414,23 @@ version = "0.8.2"
43714414
source = "registry+https://github.com/rust-lang/crates.io-index"
43724415
checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df"
43734416
dependencies = [
4374-
"block-buffer",
4375-
"digest",
4417+
"block-buffer 0.7.3",
4418+
"digest 0.8.1",
43764419
"fake-simd",
4377-
"opaque-debug",
4420+
"opaque-debug 0.2.3",
4421+
]
4422+
4423+
[[package]]
4424+
name = "sha2"
4425+
version = "0.9.1"
4426+
source = "registry+https://github.com/rust-lang/crates.io-index"
4427+
checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1"
4428+
dependencies = [
4429+
"block-buffer 0.9.0",
4430+
"cfg-if",
4431+
"cpuid-bool",
4432+
"digest 0.9.0",
4433+
"opaque-debug 0.3.0",
43784434
]
43794435

43804436
[[package]]

src/tools/build-manifest/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@ serde_json = "1.0"
1111
anyhow = "1.0.32"
1212
flate2 = "1.0.16"
1313
tar = "0.4.29"
14+
sha2 = "0.9.1"
15+
rayon = "1.3.1"
16+
hex = "0.4.2"

src/tools/build-manifest/src/main.rs

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@
77
mod manifest;
88
mod versions;
99

10-
use crate::manifest::{Component, Manifest, Package, Rename, Target};
10+
use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target};
1111
use crate::versions::{PkgType, Versions};
12-
use std::collections::{BTreeMap, HashMap};
12+
use rayon::prelude::*;
13+
use sha2::Digest;
14+
use std::collections::{BTreeMap, HashMap, HashSet};
1315
use std::env;
16+
use std::error::Error;
1417
use std::fs::{self, File};
1518
use std::io::{self, Read, Write};
1619
use std::path::{Path, PathBuf};
1720
use std::process::{Command, Stdio};
21+
use std::sync::Mutex;
22+
use std::time::Instant;
1823

1924
static HOSTS: &[&str] = &[
2025
"aarch64-unknown-linux-gnu",
@@ -181,7 +186,6 @@ struct Builder {
181186

182187
input: PathBuf,
183188
output: PathBuf,
184-
digests: BTreeMap<String, String>,
185189
s3_address: String,
186190
date: String,
187191

@@ -223,7 +227,6 @@ fn main() {
223227

224228
input,
225229
output,
226-
digests: BTreeMap::new(),
227230
s3_address,
228231
date,
229232

@@ -236,7 +239,9 @@ fn main() {
236239
impl Builder {
237240
fn build(&mut self) {
238241
self.check_toolstate();
239-
self.digest_and_sign();
242+
if self.legacy {
243+
self.digest_and_sign();
244+
}
240245
let manifest = self.build_manifest();
241246

242247
let rust_version = self.versions.package_version(&PkgType::Rust).unwrap();
@@ -270,10 +275,9 @@ impl Builder {
270275
/// Hash all files, compute their signatures, and collect the hashes in `self.digests`.
271276
fn digest_and_sign(&mut self) {
272277
for file in t!(self.input.read_dir()).map(|e| t!(e).path()) {
273-
let filename = file.file_name().unwrap().to_str().unwrap();
274-
let digest = self.hash(&file);
278+
file.file_name().unwrap().to_str().unwrap();
279+
self.hash(&file);
275280
self.sign(&file);
276-
assert!(self.digests.insert(filename.to_string(), digest).is_none());
277281
}
278282
}
279283

@@ -289,6 +293,9 @@ impl Builder {
289293
self.add_profiles_to(&mut manifest);
290294
self.add_renames_to(&mut manifest);
291295
manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest));
296+
297+
self.fill_missing_hashes(&mut manifest);
298+
292299
manifest
293300
}
294301

@@ -561,6 +568,41 @@ impl Builder {
561568
assert!(t!(child.wait()).success());
562569
}
563570

571+
fn fill_missing_hashes(&self, manifest: &mut Manifest) {
572+
// First collect all files that need hashes
573+
let mut need_hashes = HashSet::new();
574+
crate::manifest::visit_file_hashes(manifest, |file_hash| {
575+
if let FileHash::Missing(path) = file_hash {
576+
need_hashes.insert(path.clone());
577+
}
578+
});
579+
580+
let collected = Mutex::new(HashMap::new());
581+
let collection_start = Instant::now();
582+
println!(
583+
"collecting hashes for {} tarballs across {} threads",
584+
need_hashes.len(),
585+
rayon::current_num_threads().min(need_hashes.len()),
586+
);
587+
need_hashes.par_iter().for_each(|path| match fetch_hash(path) {
588+
Ok(hash) => {
589+
collected.lock().unwrap().insert(path, hash);
590+
}
591+
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
592+
});
593+
let collected = collected.into_inner().unwrap();
594+
println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed());
595+
596+
crate::manifest::visit_file_hashes(manifest, |file_hash| {
597+
if let FileHash::Missing(path) = file_hash {
598+
match collected.get(path) {
599+
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
600+
None => panic!("missing hash for file {}", path.display()),
601+
}
602+
}
603+
})
604+
}
605+
564606
fn write_channel_files(&self, channel_name: &str, manifest: &Manifest) {
565607
self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml");
566608
self.write(&manifest.date, channel_name, "-date.txt");
@@ -574,7 +616,16 @@ impl Builder {
574616
fn write(&self, contents: &str, channel_name: &str, suffix: &str) {
575617
let dst = self.output.join(format!("channel-rust-{}{}", channel_name, suffix));
576618
t!(fs::write(&dst, contents));
577-
self.hash(&dst);
578-
self.sign(&dst);
619+
if self.legacy {
620+
self.hash(&dst);
621+
self.sign(&dst);
622+
}
579623
}
580624
}
625+
626+
fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> {
627+
let mut file = File::open(path)?;
628+
let mut sha256 = sha2::Sha256::default();
629+
std::io::copy(&mut file, &mut sha256)?;
630+
Ok(hex::encode(sha256.finalize()))
631+
}

src/tools/build-manifest/src/manifest.rs

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::Builder;
2-
use serde::Serialize;
2+
use serde::{Serialize, Serializer};
33
use std::collections::BTreeMap;
44
use std::path::{Path, PathBuf};
55

@@ -29,9 +29,9 @@ pub(crate) struct Rename {
2929
pub(crate) struct Target {
3030
pub(crate) available: bool,
3131
pub(crate) url: Option<String>,
32-
pub(crate) hash: Option<String>,
32+
pub(crate) hash: Option<FileHash>,
3333
pub(crate) xz_url: Option<String>,
34-
pub(crate) xz_hash: Option<String>,
34+
pub(crate) xz_hash: Option<FileHash>,
3535
pub(crate) components: Option<Vec<Component>>,
3636
pub(crate) extensions: Option<Vec<Component>>,
3737
}
@@ -52,10 +52,10 @@ impl Target {
5252
extensions: None,
5353
// .gz
5454
url: gz.as_ref().map(|path| builder.url(path)),
55-
hash: gz.map(|path| Self::digest_of(builder, &path)),
55+
hash: gz.map(FileHash::Missing),
5656
// .xz
5757
xz_url: xz.as_ref().map(|path| builder.url(path)),
58-
xz_hash: xz.map(|path| Self::digest_of(builder, &path)),
58+
xz_hash: xz.map(FileHash::Missing),
5959
}
6060
}
6161

@@ -65,12 +65,6 @@ impl Target {
6565
if path.is_file() { Some(path) } else { None }
6666
}
6767

68-
fn digest_of(builder: &Builder, path: &Path) -> String {
69-
// TEMPORARY CODE -- DON'T REVIEW :)
70-
let file_name = path.file_name().unwrap().to_str().unwrap();
71-
builder.digests.get(file_name).unwrap().clone()
72-
}
73-
7468
pub(crate) fn unavailable() -> Self {
7569
Self::default()
7670
}
@@ -87,3 +81,34 @@ impl Component {
8781
Self { pkg: pkg.to_string(), target: target.to_string() }
8882
}
8983
}
84+
85+
#[allow(unused)]
86+
pub(crate) enum FileHash {
87+
Missing(PathBuf),
88+
Present(String),
89+
}
90+
91+
impl Serialize for FileHash {
92+
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
93+
match self {
94+
FileHash::Missing(path) => Err(serde::ser::Error::custom(format!(
95+
"can't serialize a missing hash for file {}",
96+
path.display()
97+
))),
98+
FileHash::Present(inner) => inner.serialize(serializer),
99+
}
100+
}
101+
}
102+
103+
pub(crate) fn visit_file_hashes(manifest: &mut Manifest, mut f: impl FnMut(&mut FileHash)) {
104+
for pkg in manifest.pkg.values_mut() {
105+
for target in pkg.target.values_mut() {
106+
if let Some(hash) = &mut target.hash {
107+
f(hash);
108+
}
109+
if let Some(hash) = &mut target.xz_hash {
110+
f(hash);
111+
}
112+
}
113+
}
114+
}

0 commit comments

Comments
 (0)