From 4d0b5e5d72d8f44594750b15558bdd0827c4debb Mon Sep 17 00:00:00 2001 From: Adam Simpkins Date: Wed, 31 Jul 2019 20:53:07 -0700 Subject: [PATCH] move project hash computation to ManifestLoader Summary: Move code that computes project hashes to ManifestLoader. ManifestLoader is the only class that has all of the information necessary to compute the project hashes correctly. The ManifestLoader object can also cache previously computed hashes, so that we don't have to keep computing hashes for projects over and over again. Previously the `BuildOptions.compute_dirs()` function would end up re-computing hashes for all dependencies each time it was called. Reviewed By: strager Differential Revision: D16477401 fbshipit-source-id: ce03642114f91ce4f859f612e6b2e747cf1653be --- build/fbcode_builder/getdeps.py | 28 +++------ build/fbcode_builder/getdeps/buildopts.py | 74 +--------------------- build/fbcode_builder/getdeps/load.py | 75 +++++++++++++++++++++++ 3 files changed, 86 insertions(+), 91 deletions(-) diff --git a/build/fbcode_builder/getdeps.py b/build/fbcode_builder/getdeps.py index 90f4f95f5..345e125f3 100755 --- a/build/fbcode_builder/getdeps.py +++ b/build/fbcode_builder/getdeps.py @@ -153,9 +153,7 @@ class ShowInstDirCmd(SubCmd): manifests = [manifest] for m in manifests: - fetcher = loader.create_fetcher(m) - dirs = opts.compute_dirs(m, fetcher, loader) - inst_dir = dirs["inst_dir"] + inst_dir = loader.get_project_install_dir(m) print(inst_dir) def setup_parser(self, parser): @@ -234,9 +232,8 @@ class BuildCmd(SubCmd): if args.clean: fetcher.clean() - dirs = opts.compute_dirs(m, fetcher, loader) - build_dir = dirs["build_dir"] - inst_dir = dirs["inst_dir"] + build_dir = loader.get_project_build_dir(m) + inst_dir = loader.get_project_install_dir(m) if m == manifest or not args.no_deps: print("Assessing %s..." % m.name) @@ -244,11 +241,12 @@ class BuildCmd(SubCmd): reconfigure = change_status.build_changed() sources_changed = change_status.sources_changed() + project_hash = loader.get_project_hash(m) built_marker = os.path.join(inst_dir, ".built-by-getdeps") if os.path.exists(built_marker): with open(built_marker, "r") as f: built_hash = f.read().strip() - if built_hash != dirs["hash"]: + if built_hash != project_hash: # Some kind of inconsistency with a prior build, # let's run it again to be sure os.unlink(built_marker) @@ -263,7 +261,7 @@ class BuildCmd(SubCmd): builder.build(install_dirs, reconfigure=reconfigure) with open(built_marker, "w") as f: - f.write(dirs["hash"]) + f.write(project_hash) install_dirs.append(inst_dir) @@ -327,11 +325,7 @@ class FixupDeps(SubCmd): install_dirs = [] for m in projects: - fetcher = loader.create_fetcher(m) - - dirs = opts.compute_dirs(m, fetcher, loader) - inst_dir = dirs["inst_dir"] - + inst_dir = loader.get_project_install_dir(m) install_dirs.append(inst_dir) if m == manifest: @@ -383,11 +377,7 @@ class TestCmd(SubCmd): install_dirs = [] for m in projects: - fetcher = loader.create_fetcher(m) - - dirs = opts.compute_dirs(m, fetcher, loader) - build_dir = dirs["build_dir"] - inst_dir = dirs["inst_dir"] + inst_dir = loader.get_project_install_dir(m) if m == manifest or args.test_all: built_marker = os.path.join(inst_dir, ".built-by-getdeps") @@ -397,8 +387,10 @@ class TestCmd(SubCmd): # want to tackle that as part of adding build-for-test # support. return 1 + fetcher = loader.create_fetcher(m) src_dir = fetcher.get_src_dir() ctx = ctx_gen.get_context(m.name) + build_dir = loader.get_project_build_dir(m) builder = m.create_builder(opts, src_dir, build_dir, inst_dir, ctx) builder.run_tests(install_dirs, schedule_type=args.schedule_type) diff --git a/build/fbcode_builder/getdeps/buildopts.py b/build/fbcode_builder/getdeps/buildopts.py index 526d80975..f9b2dfac1 100644 --- a/build/fbcode_builder/getdeps/buildopts.py +++ b/build/fbcode_builder/getdeps/buildopts.py @@ -7,17 +7,15 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import base64 import errno import glob -import hashlib import ntpath import os import subprocess import sys import tempfile -from .envfuncs import Env, add_path_entry, path_search +from .envfuncs import Env, add_path_entry from .manifest import ContextGenerator from .platform import HostType, is_windows @@ -148,76 +146,6 @@ class BuildOptions(object): } ) - def _compute_hash(self, hash_by_name, manifest, loader): - """ This recursive function computes a hash for a given manifest. - The hash takes into account some environmental factors on the - host machine and includes the hashes of its dependencies. - No caching of the computation is performed, which is theoretically - wasteful but the computation is fast enough that it is not required - to cache across multiple invocations. """ - - h = hash_by_name.get(manifest.name, None) - if h is not None: - return h - - hasher = hashlib.sha256() - # Some environmental and configuration things matter - env = {} - env["install_dir"] = self.install_dir - env["scratch_dir"] = self.scratch_dir - env["os"] = self.host_type.ostype - env["distro"] = self.host_type.distro - env["distro_vers"] = self.host_type.distrovers - for name in ["CXXFLAGS", "CPPFLAGS", "LDFLAGS", "CXX", "CC"]: - env[name] = os.environ.get(name) - for tool in ["cc", "c++", "gcc", "g++", "clang", "clang++"]: - env["tool-%s" % tool] = path_search(os.environ, tool) - - fetcher = loader.create_fetcher(manifest) - env["fetcher.hash"] = fetcher.hash() - - for name in sorted(env.keys()): - hasher.update(name.encode("utf-8")) - value = env.get(name) - if value is not None: - hasher.update(value.encode("utf-8")) - - ctx = loader.ctx_gen.get_context(manifest.name) - manifest.update_hash(hasher, ctx) - - dep_list = sorted(manifest.get_section_as_dict("dependencies", ctx).keys()) - for dep in dep_list: - dep_hash = self._compute_hash( - hash_by_name, loader.load_manifest(dep), loader - ) - hasher.update(dep_hash.encode("utf-8")) - - # Use base64 to represent the hash, rather than the simple hex digest, - # so that the string is shorter. Use the URL-safe encoding so that - # the hash can also be safely used as a filename component. - h = base64.urlsafe_b64encode(hasher.digest()).decode("ascii") - # ... and because cmd.exe is troublesome with `=` signs, nerf those. - # They tend to be padding characters at the end anyway, so we can - # safely discard them. - h = h.replace("=", "") - hash_by_name[manifest.name] = h - - return h - - def compute_dirs(self, manifest, fetcher, loader): - hash_by_name = {} - hash = self._compute_hash(hash_by_name, manifest, loader) - - if manifest.is_first_party_project(): - directory = manifest.name - else: - directory = "%s-%s" % (manifest.name, hash) - - build_dir = os.path.join(self.scratch_dir, "build", directory) - inst_dir = os.path.join(self.install_dir, directory) - - return {"build_dir": build_dir, "inst_dir": inst_dir, "hash": hash} - def compute_env_for_install_dirs(self, install_dirs, env=None): if env: env = env.copy() diff --git a/build/fbcode_builder/getdeps/load.py b/build/fbcode_builder/getdeps/load.py index bb5e6f6e2..9f0759c56 100644 --- a/build/fbcode_builder/getdeps/load.py +++ b/build/fbcode_builder/getdeps/load.py @@ -7,9 +7,12 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import base64 import glob +import hashlib import os +from .envfuncs import path_search from .manifest import ManifestParser @@ -101,6 +104,7 @@ class ManifestLoader(object): self.manifests_by_name = {} self._loaded_all = False + self._project_hashes = {} def load_manifest(self, name): manifest = self.manifests_by_name.get(name) @@ -191,3 +195,74 @@ class ManifestLoader(object): def create_fetcher(self, manifest): ctx = self.ctx_gen.get_context(manifest.name) return manifest.create_fetcher(self.build_opts, ctx) + + def get_project_hash(self, manifest): + h = self._project_hashes.get(manifest.name) + if h is None: + h = self._compute_project_hash(manifest) + self._project_hashes[manifest.name] = h + return h + + def _compute_project_hash(self, manifest): + """ This recursive function computes a hash for a given manifest. + The hash takes into account some environmental factors on the + host machine and includes the hashes of its dependencies. + No caching of the computation is performed, which is theoretically + wasteful but the computation is fast enough that it is not required + to cache across multiple invocations. """ + hasher = hashlib.sha256() + # Some environmental and configuration things matter + env = {} + env["install_dir"] = self.build_opts.install_dir + env["scratch_dir"] = self.build_opts.scratch_dir + env["os"] = self.build_opts.host_type.ostype + env["distro"] = self.build_opts.host_type.distro + env["distro_vers"] = self.build_opts.host_type.distrovers + for name in ["CXXFLAGS", "CPPFLAGS", "LDFLAGS", "CXX", "CC"]: + env[name] = os.environ.get(name) + for tool in ["cc", "c++", "gcc", "g++", "clang", "clang++"]: + env["tool-%s" % tool] = path_search(os.environ, tool) + + fetcher = self.create_fetcher(manifest) + env["fetcher.hash"] = fetcher.hash() + + for name in sorted(env.keys()): + hasher.update(name.encode("utf-8")) + value = env.get(name) + if value is not None: + hasher.update(value.encode("utf-8")) + + ctx = self.ctx_gen.get_context(manifest.name) + manifest.update_hash(hasher, ctx) + + dep_list = sorted(manifest.get_section_as_dict("dependencies", ctx).keys()) + for dep in dep_list: + dep_manifest = self.load_manifest(dep) + dep_hash = self.get_project_hash(dep_manifest) + hasher.update(dep_hash.encode("utf-8")) + + # Use base64 to represent the hash, rather than the simple hex digest, + # so that the string is shorter. Use the URL-safe encoding so that + # the hash can also be safely used as a filename component. + h = base64.urlsafe_b64encode(hasher.digest()).decode("ascii") + # ... and because cmd.exe is troublesome with `=` signs, nerf those. + # They tend to be padding characters at the end anyway, so we can + # safely discard them. + h = h.replace("=", "") + + return h + + def _get_project_dir_name(self, manifest): + if manifest.is_first_party_project(): + return manifest.name + else: + project_hash = self.get_project_hash(manifest) + return "%s-%s" % (manifest.name, project_hash) + + def get_project_install_dir(self, manifest): + project_dir_name = self._get_project_dir_name(manifest) + return os.path.join(self.build_opts.install_dir, project_dir_name) + + def get_project_build_dir(self, manifest): + project_dir_name = self._get_project_dir_name(manifest) + return os.path.join(self.build_opts.scratch_dir, "build", project_dir_name)