From 488da69ce927dc21fdf8f83c174b12c09e5acce0 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sun, 16 Apr 2023 21:31:33 +0200 Subject: [PATCH 1/4] bootstrap.py: Create cache before download This make `_download_component_helper` "pure". --- src/bootstrap/bootstrap.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/bootstrap/bootstrap.py b/src/bootstrap/bootstrap.py index 025145244c491..31546d1951734 100644 --- a/src/bootstrap/bootstrap.py +++ b/src/bootstrap/bootstrap.py @@ -429,17 +429,24 @@ def download_toolchain(self): self.program_out_of_date(self.rustc_stamp(), key)): if os.path.exists(bin_root): shutil.rmtree(bin_root) + + key = self.stage0_compiler.date + cache_dst = os.path.join(self.build_dir, "cache") + rustc_cache = os.path.join(cache_dst, key) + if not os.path.exists(rustc_cache): + os.makedirs(rustc_cache) + tarball_suffix = '.tar.gz' if lzma is None else '.tar.xz' filename = "rust-std-{}-{}{}".format( rustc_channel, self.build, tarball_suffix) pattern = "rust-std-{}".format(self.build) - self._download_component_helper(filename, pattern, tarball_suffix) + self._download_component_helper(filename, pattern, tarball_suffix, rustc_cache) filename = "rustc-{}-{}{}".format(rustc_channel, self.build, tarball_suffix) - self._download_component_helper(filename, "rustc", tarball_suffix) + self._download_component_helper(filename, "rustc", tarball_suffix, rustc_cache) filename = "cargo-{}-{}{}".format(rustc_channel, self.build, tarball_suffix) - self._download_component_helper(filename, "cargo", tarball_suffix) + self._download_component_helper(filename, "cargo", tarball_suffix, rustc_cache) if self.should_fix_bins_and_dylibs(): self.fix_bin_or_dylib("{}/bin/cargo".format(bin_root)) @@ -455,13 +462,9 @@ def download_toolchain(self): rust_stamp.write(key) def _download_component_helper( - self, filename, pattern, tarball_suffix, + self, filename, pattern, tarball_suffix, rustc_cache, ): key = self.stage0_compiler.date - cache_dst = os.path.join(self.build_dir, "cache") - rustc_cache = os.path.join(cache_dst, key) - if not os.path.exists(rustc_cache): - os.makedirs(rustc_cache) tarball = os.path.join(rustc_cache, filename) if not os.path.exists(tarball): From fa4639195c3e3b010df68dcbc9f908dd57282894 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sun, 16 Apr 2023 21:34:55 +0200 Subject: [PATCH 2/4] bootstrap.py: Use loop for `_download_component_helper` --- src/bootstrap/bootstrap.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/bootstrap/bootstrap.py b/src/bootstrap/bootstrap.py index 31546d1951734..9e4c0d2b16799 100644 --- a/src/bootstrap/bootstrap.py +++ b/src/bootstrap/bootstrap.py @@ -437,16 +437,17 @@ def download_toolchain(self): os.makedirs(rustc_cache) tarball_suffix = '.tar.gz' if lzma is None else '.tar.xz' - filename = "rust-std-{}-{}{}".format( - rustc_channel, self.build, tarball_suffix) - pattern = "rust-std-{}".format(self.build) - self._download_component_helper(filename, pattern, tarball_suffix, rustc_cache) - filename = "rustc-{}-{}{}".format(rustc_channel, self.build, - tarball_suffix) - self._download_component_helper(filename, "rustc", tarball_suffix, rustc_cache) - filename = "cargo-{}-{}{}".format(rustc_channel, self.build, - tarball_suffix) - self._download_component_helper(filename, "cargo", tarball_suffix, rustc_cache) + + tarballs_to_download = [ + ("rust-std-{}-{}{}".format(rustc_channel, self.build, tarball_suffix), + "rust-std-{}".format(self.build)), + ("rustc-{}-{}{}".format(rustc_channel, self.build, tarball_suffix), "rustc"), + ("cargo-{}-{}{}".format(rustc_channel, self.build, tarball_suffix), "cargo"), + ] + + for filename, pattern in tarballs_to_download: + self._download_component_helper(filename, pattern, tarball_suffix, rustc_cache) + if self.should_fix_bins_and_dylibs(): self.fix_bin_or_dylib("{}/bin/cargo".format(bin_root)) From 46a25581aae5815382aa9291ec7071aafbfea7ba Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sun, 16 Apr 2023 21:36:05 +0200 Subject: [PATCH 3/4] Extract variable --- src/bootstrap/bootstrap.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/bootstrap/bootstrap.py b/src/bootstrap/bootstrap.py index 9e4c0d2b16799..30613e47cbd1a 100644 --- a/src/bootstrap/bootstrap.py +++ b/src/bootstrap/bootstrap.py @@ -438,11 +438,12 @@ def download_toolchain(self): tarball_suffix = '.tar.gz' if lzma is None else '.tar.xz' + toolchain_suffix = "{}-{}{}".format(rustc_channel, self.build, tarball_suffix) + tarballs_to_download = [ - ("rust-std-{}-{}{}".format(rustc_channel, self.build, tarball_suffix), - "rust-std-{}".format(self.build)), - ("rustc-{}-{}{}".format(rustc_channel, self.build, tarball_suffix), "rustc"), - ("cargo-{}-{}{}".format(rustc_channel, self.build, tarball_suffix), "cargo"), + ("rust-std-{}".format(toolchain_suffix), "rust-std-{}".format(self.build)), + ("rustc-{}".format(toolchain_suffix), "rustc"), + ("cargo-{}".format(toolchain_suffix), "cargo"), ] for filename, pattern in tarballs_to_download: From a98968ee0e03b15ce8656d77f75ae664a0b2d6b5 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sun, 16 Apr 2023 22:03:09 +0200 Subject: [PATCH 4/4] Parallelize initial rust extraction This is quite slow and embarassingly parallel, even in python. This speeds up the initial bootstrap build by about 5-10s. --- src/bootstrap/bootstrap.py | 70 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/src/bootstrap/bootstrap.py b/src/bootstrap/bootstrap.py index 30613e47cbd1a..771dca51ede97 100644 --- a/src/bootstrap/bootstrap.py +++ b/src/bootstrap/bootstrap.py @@ -13,6 +13,7 @@ import tempfile from time import time +from multiprocessing import Pool, cpu_count try: import lzma @@ -392,6 +393,48 @@ def channel(self): return self.version + "-" + self.date +class DownloadInfo: + """A helper class that can be pickled into a parallel subprocess""" + + def __init__( + self, + base_download_url, + download_path, + bin_root, + tarball_path, + tarball_suffix, + checksums_sha256, + pattern, + verbose, + ): + self.base_download_url = base_download_url + self.download_path = download_path + self.bin_root = bin_root + self.tarball_path = tarball_path + self.tarball_suffix = tarball_suffix + self.checksums_sha256 = checksums_sha256 + self.pattern = pattern + self.verbose = verbose + +def download_component(download_info): + if not os.path.exists(download_info.tarball_path): + get( + download_info.base_download_url, + download_info.download_path, + download_info.tarball_path, + download_info.checksums_sha256, + verbose=download_info.verbose, + ) + +def unpack_component(download_info): + unpack( + download_info.tarball_path, + download_info.tarball_suffix, + download_info.bin_root, + match=download_info.pattern, + verbose=download_info.verbose, + ) + class RustBuild(object): """Provide all the methods required to build Rust""" def __init__(self): @@ -446,8 +489,31 @@ def download_toolchain(self): ("cargo-{}".format(toolchain_suffix), "cargo"), ] - for filename, pattern in tarballs_to_download: - self._download_component_helper(filename, pattern, tarball_suffix, rustc_cache) + tarballs_download_info = [ + DownloadInfo( + base_download_url=self.download_url, + download_path="dist/{}/{}".format(self.stage0_compiler.date, filename), + bin_root=self.bin_root(), + tarball_path=os.path.join(rustc_cache, filename), + tarball_suffix=tarball_suffix, + checksums_sha256=self.checksums_sha256, + pattern=pattern, + verbose=self.verbose, + ) + for filename, pattern in tarballs_to_download + ] + + # Download the components serially to show the progress bars properly. + for download_info in tarballs_download_info: + download_component(download_info) + + # Unpack the tarballs in parallle. + # In Python 2.7, Pool cannot be used as a context manager. + p = Pool(min(len(tarballs_download_info), cpu_count())) + try: + p.map(unpack_component, tarballs_download_info) + finally: + p.close() if self.should_fix_bins_and_dylibs(): self.fix_bin_or_dylib("{}/bin/cargo".format(bin_root))