mirror of
https://github.com/facebook/proxygen.git
synced 2025-08-07 07:02:53 +03:00
fbcode_builder: getdeps: beef up hash computation
Summary: previously, a relatively lame hash was computed to use for the build directory based on some hash of the source directory. That was good enough to get things off the ground, but in the interest of being able to cache the build outputs and safely invalidate them we need a slightly more rigorous implementation. This diff computes a hash based on the manifest contents and relevant environmental factors. The hash is used to name the build directory which will ultimately be cached eg: using the travis/appveyor cache directory configuration and some other means for the FB internal CI. The hash needs to be sufficient that we change the hash when the manifests change. We can tolerate a false positive change in hash (it just means that a build will take longer), but cannot tolerate a false negative (which would result in an incorrect build). Reviewed By: simpkins Differential Revision: D14710332 fbshipit-source-id: ebc2e74eafc6f3305d4412a82195bc9fb9dfa615
This commit is contained in:
committed by
Facebook Github Bot
parent
080f2c565f
commit
4f5544674a
@@ -145,7 +145,9 @@ class BuildCmd(SubCmd):
|
|||||||
manifest = load_project(opts, args.project)
|
manifest = load_project(opts, args.project)
|
||||||
|
|
||||||
ctx = context_from_host_tuple()
|
ctx = context_from_host_tuple()
|
||||||
|
print("Building on %s" % ctx)
|
||||||
projects = manifests_in_dependency_order(opts, manifest, ctx)
|
projects = manifests_in_dependency_order(opts, manifest, ctx)
|
||||||
|
manifests_by_name = {m.name: m for m in projects}
|
||||||
|
|
||||||
# Accumulate the install directories so that the build steps
|
# Accumulate the install directories so that the build steps
|
||||||
# can find their dep installation
|
# can find their dep installation
|
||||||
@@ -161,7 +163,7 @@ class BuildCmd(SubCmd):
|
|||||||
reconfigure = change_status.build_changed()
|
reconfigure = change_status.build_changed()
|
||||||
sources_changed = change_status.sources_changed()
|
sources_changed = change_status.sources_changed()
|
||||||
|
|
||||||
dirs = opts.compute_dirs(m, fetcher)
|
dirs = opts.compute_dirs(m, fetcher, manifests_by_name, ctx)
|
||||||
build_dir = dirs["build_dir"]
|
build_dir = dirs["build_dir"]
|
||||||
inst_dir = dirs["inst_dir"]
|
inst_dir = dirs["inst_dir"]
|
||||||
|
|
||||||
@@ -169,7 +171,7 @@ class BuildCmd(SubCmd):
|
|||||||
if os.path.exists(built_marker):
|
if os.path.exists(built_marker):
|
||||||
with open(built_marker, "r") as f:
|
with open(built_marker, "r") as f:
|
||||||
built_hash = f.read().strip()
|
built_hash = f.read().strip()
|
||||||
if built_hash != hash:
|
if built_hash != dirs["hash"]:
|
||||||
# Some kind of inconsistency with a prior build,
|
# Some kind of inconsistency with a prior build,
|
||||||
# let's run it again to be sure
|
# let's run it again to be sure
|
||||||
os.unlink(built_marker)
|
os.unlink(built_marker)
|
||||||
@@ -213,6 +215,7 @@ class TestCmd(SubCmd):
|
|||||||
|
|
||||||
ctx = context_from_host_tuple()
|
ctx = context_from_host_tuple()
|
||||||
projects = manifests_in_dependency_order(opts, manifest, ctx)
|
projects = manifests_in_dependency_order(opts, manifest, ctx)
|
||||||
|
manifests_by_name = {m.name: m for m in projects}
|
||||||
|
|
||||||
# Accumulate the install directories so that the test steps
|
# Accumulate the install directories so that the test steps
|
||||||
# can find their dep installation
|
# can find their dep installation
|
||||||
@@ -221,7 +224,7 @@ class TestCmd(SubCmd):
|
|||||||
for m in projects:
|
for m in projects:
|
||||||
fetcher = m.create_fetcher(opts, ctx)
|
fetcher = m.create_fetcher(opts, ctx)
|
||||||
|
|
||||||
dirs = opts.compute_dirs(m, fetcher)
|
dirs = opts.compute_dirs(m, fetcher, manifests_by_name, ctx)
|
||||||
build_dir = dirs["build_dir"]
|
build_dir = dirs["build_dir"]
|
||||||
inst_dir = dirs["inst_dir"]
|
inst_dir = dirs["inst_dir"]
|
||||||
|
|
||||||
|
@@ -7,11 +7,14 @@
|
|||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
import errno
|
import errno
|
||||||
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
from .envfuncs import path_search
|
||||||
from .platform import HostType, is_windows
|
from .platform import HostType, is_windows
|
||||||
|
|
||||||
|
|
||||||
@@ -89,9 +92,70 @@ class BuildOptions(object):
|
|||||||
def is_linux(self):
|
def is_linux(self):
|
||||||
return self.host_type.is_linux()
|
return self.host_type.is_linux()
|
||||||
|
|
||||||
def compute_dirs(self, manifest, fetcher):
|
def _compute_hash(self, hash_by_name, manifest, manifests_by_name, ctx):
|
||||||
hash = fetcher.hash()
|
""" This recursive function computes a hash for a given manifest.
|
||||||
directory = "%s-%s" % (manifest.name, hash)
|
The hash takes into account some environmental factors on the
|
||||||
|
host machine and includes the hashes of its dependencies.
|
||||||
|
No caching of the computation is performed, which is theoretically
|
||||||
|
wasteful but the computation is fast enough that it is not required
|
||||||
|
to cache across multiple invocations. """
|
||||||
|
|
||||||
|
h = hash_by_name.get(manifest.name, None)
|
||||||
|
if h is not None:
|
||||||
|
return h
|
||||||
|
|
||||||
|
hasher = hashlib.sha256()
|
||||||
|
# Some environmental and configuration things matter
|
||||||
|
env = {}
|
||||||
|
env["install_dir"] = self.install_dir
|
||||||
|
env["scratch_dir"] = self.scratch_dir
|
||||||
|
env["os"] = self.host_type.ostype
|
||||||
|
env["distro"] = self.host_type.distro
|
||||||
|
env["distro_vers"] = self.host_type.distrovers
|
||||||
|
for name in ["CXXFLAGS", "CPPFLAGS", "LDFLAGS", "CXX", "CC"]:
|
||||||
|
env[name] = os.environ.get(name)
|
||||||
|
for tool in ["cc", "c++", "gcc", "g++", "clang", "clang++"]:
|
||||||
|
env["tool-%s" % tool] = path_search(os.environ, tool)
|
||||||
|
|
||||||
|
fetcher = manifest.create_fetcher(self, ctx)
|
||||||
|
env["fetcher.hash"] = fetcher.hash()
|
||||||
|
|
||||||
|
for name in sorted(env.keys()):
|
||||||
|
hasher.update(name.encode("utf-8"))
|
||||||
|
value = env.get(name)
|
||||||
|
if value is not None:
|
||||||
|
hasher.update(value.encode("utf-8"))
|
||||||
|
|
||||||
|
manifest.update_hash(hasher, ctx)
|
||||||
|
|
||||||
|
dep_list = sorted(manifest.get_section_as_dict("dependencies", ctx).keys())
|
||||||
|
for dep in dep_list:
|
||||||
|
dep_hash = self._compute_hash(
|
||||||
|
hash_by_name, manifests_by_name[dep], manifests_by_name, ctx
|
||||||
|
)
|
||||||
|
hasher.update(dep_hash.encode("utf-8"))
|
||||||
|
|
||||||
|
# Use base64 to represent the hash, rather than the simple hex digest,
|
||||||
|
# so that the string is shorter. Use the URL-safe encoding so that
|
||||||
|
# the hash can also be safely used as a filename component.
|
||||||
|
h = base64.urlsafe_b64encode(hasher.digest()).decode("ascii")
|
||||||
|
# ... and because cmd.exe is troublesome with `=` signs, nerf those.
|
||||||
|
# They tend to be padding characters at the end anyway, so we can
|
||||||
|
# safely discard them.
|
||||||
|
h = h.replace("=", "")
|
||||||
|
hash_by_name[manifest.name] = h
|
||||||
|
|
||||||
|
return h
|
||||||
|
|
||||||
|
def compute_dirs(self, manifest, fetcher, manifests_by_name, ctx):
|
||||||
|
hash_by_name = {}
|
||||||
|
hash = self._compute_hash(hash_by_name, manifest, manifests_by_name, ctx)
|
||||||
|
|
||||||
|
if manifest.is_first_party_project():
|
||||||
|
directory = manifest.name
|
||||||
|
else:
|
||||||
|
directory = "%s-%s" % (manifest.name, hash)
|
||||||
|
|
||||||
build_dir = os.path.join(self.scratch_dir, "build", directory)
|
build_dir = os.path.join(self.scratch_dir, "build", directory)
|
||||||
inst_dir = os.path.join(self.scratch_dir, "installed", directory)
|
inst_dir = os.path.join(self.scratch_dir, "installed", directory)
|
||||||
|
|
||||||
|
@@ -20,6 +20,7 @@ import time
|
|||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
from .copytree import prefetch_dir_if_eden
|
from .copytree import prefetch_dir_if_eden
|
||||||
|
from .envfuncs import Env
|
||||||
from .platform import is_windows
|
from .platform import is_windows
|
||||||
from .runcmd import run_cmd
|
from .runcmd import run_cmd
|
||||||
|
|
||||||
@@ -114,7 +115,12 @@ class Fetcher(object):
|
|||||||
working copy. For a git repo this is commit hash for the working
|
working copy. For a git repo this is commit hash for the working
|
||||||
copy. For other Fetchers this should relate to the version of
|
copy. For other Fetchers this should relate to the version of
|
||||||
the code in the src dir. The intent is that if a manifest
|
the code in the src dir. The intent is that if a manifest
|
||||||
changes the version/rev of a project that the hash be different. """
|
changes the version/rev of a project that the hash be different.
|
||||||
|
Importantly, this should be computable without actually fetching
|
||||||
|
the code, as we want this to factor into a hash used to download
|
||||||
|
a pre-built version of the code, without having to first download
|
||||||
|
and extract its sources (eg: boost on windows is pretty painful).
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_src_dir(self):
|
def get_src_dir(self):
|
||||||
@@ -216,13 +222,7 @@ class GitFetcher(Fetcher):
|
|||||||
run_cmd(["git", "clean", "-fxd"], cwd=self.repo_dir)
|
run_cmd(["git", "clean", "-fxd"], cwd=self.repo_dir)
|
||||||
|
|
||||||
def hash(self):
|
def hash(self):
|
||||||
""" Returns a hash that identifies the version of the code in the
|
return self.rev
|
||||||
working copy """
|
|
||||||
return (
|
|
||||||
subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=self.repo_dir)
|
|
||||||
.strip()
|
|
||||||
.decode("utf-8")[0:6]
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_src_dir(self):
|
def get_src_dir(self):
|
||||||
return self.repo_dir
|
return self.repo_dir
|
||||||
@@ -402,6 +402,31 @@ class ShipitPathMap(object):
|
|||||||
return change_status
|
return change_status
|
||||||
|
|
||||||
|
|
||||||
|
FBSOURCE_REPO_HASH = {}
|
||||||
|
|
||||||
|
|
||||||
|
def get_fbsource_repo_hash(build_options):
|
||||||
|
""" Returns the hash for the fbsource repo.
|
||||||
|
Since we may have multiple first party projects to
|
||||||
|
hash, and because we don't mutate the repo, we cache
|
||||||
|
this hash in a global. """
|
||||||
|
global FBSOURCE_REPO_HASH
|
||||||
|
cached_hash = FBSOURCE_REPO_HASH.get(build_options.fbsource_dir)
|
||||||
|
if cached_hash:
|
||||||
|
return cached_hash
|
||||||
|
|
||||||
|
cmd = ["hg", "log", "-r.", "-T{node}"]
|
||||||
|
env = Env()
|
||||||
|
env.set("HGPLAIN", "1")
|
||||||
|
cached_hash = subprocess.check_output(
|
||||||
|
cmd, cwd=build_options.fbsource_dir, env=dict(env.items())
|
||||||
|
).decode("ascii")
|
||||||
|
|
||||||
|
FBSOURCE_REPO_HASH[build_options.fbsource_dir] = cached_hash
|
||||||
|
|
||||||
|
return cached_hash
|
||||||
|
|
||||||
|
|
||||||
class SimpleShipitTransformerFetcher(Fetcher):
|
class SimpleShipitTransformerFetcher(Fetcher):
|
||||||
def __init__(self, build_options, manifest):
|
def __init__(self, build_options, manifest):
|
||||||
self.build_options = build_options
|
self.build_options = build_options
|
||||||
@@ -426,7 +451,7 @@ class SimpleShipitTransformerFetcher(Fetcher):
|
|||||||
return mapping.mirror(self.build_options.fbsource_dir, self.repo_dir)
|
return mapping.mirror(self.build_options.fbsource_dir, self.repo_dir)
|
||||||
|
|
||||||
def hash(self):
|
def hash(self):
|
||||||
return "fbsource" # FIXME: use the hash of the repo in this and the repo_dir
|
return get_fbsource_repo_hash(self.build_options)
|
||||||
|
|
||||||
def get_src_dir(self):
|
def get_src_dir(self):
|
||||||
return self.repo_dir
|
return self.repo_dir
|
||||||
@@ -493,13 +518,7 @@ class ShipitTransformerFetcher(Fetcher):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
def hash(self):
|
def hash(self):
|
||||||
cmd = ["hg", "log", "-r.", "-T{node}"]
|
return get_fbsource_repo_hash(self.build_options)
|
||||||
env = os.environ.copy()
|
|
||||||
env["HGPLAIN"] = "1"
|
|
||||||
fbsource_hash = subprocess.check_output(
|
|
||||||
cmd, cwd=self.build_options.fbsource_dir, env=env
|
|
||||||
)
|
|
||||||
return fbsource_hash[0:6]
|
|
||||||
|
|
||||||
def get_src_dir(self):
|
def get_src_dir(self):
|
||||||
return self.repo_dir
|
return self.repo_dir
|
||||||
@@ -637,7 +656,7 @@ class ArchiveFetcher(Fetcher):
|
|||||||
return ChangeStatus(True)
|
return ChangeStatus(True)
|
||||||
|
|
||||||
def hash(self):
|
def hash(self):
|
||||||
return self.sha256[0:6]
|
return self.sha256
|
||||||
|
|
||||||
def get_src_dir(self):
|
def get_src_dir(self):
|
||||||
return self.src_dir
|
return self.src_dir
|
||||||
|
@@ -271,6 +271,36 @@ class ManifestParser(object):
|
|||||||
d[field] = value
|
d[field] = value
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
def update_hash(self, hasher, ctx):
|
||||||
|
""" Compute a hash over the configuration for the given
|
||||||
|
context. The goal is for the hash to change if the config
|
||||||
|
for that context changes, but not if a change is made to
|
||||||
|
the config only for a different platform than that expressed
|
||||||
|
by ctx. The hash is intended to be used to help invalidate
|
||||||
|
a future cache for the third party build products.
|
||||||
|
The hasher argument is a hash object returned from hashlib. """
|
||||||
|
for section in sorted(SCHEMA.keys()):
|
||||||
|
hasher.update(section.encode("utf-8"))
|
||||||
|
|
||||||
|
# Note: at the time of writing, nothing in the implementation
|
||||||
|
# relies on keys in any config section being ordered.
|
||||||
|
# In theory we could have conflicting flags in different
|
||||||
|
# config sections and later flags override earlier flags.
|
||||||
|
# For the purposes of computing a hash we're not super
|
||||||
|
# concerned about this: manifest changes should be rare
|
||||||
|
# enough and we'd rather that this trigger an invalidation
|
||||||
|
# than strive for a cache hit at this time.
|
||||||
|
pairs = self.get_section_as_ordered_pairs(section, ctx)
|
||||||
|
pairs.sort(key=lambda pair: pair[0])
|
||||||
|
for key, value in pairs:
|
||||||
|
hasher.update(key.encode("utf-8"))
|
||||||
|
if value is not None:
|
||||||
|
hasher.update(value.encode("utf-8"))
|
||||||
|
|
||||||
|
def is_first_party_project(self):
|
||||||
|
""" returns true if this is an FB first-party project """
|
||||||
|
return self.shipit_project is not None
|
||||||
|
|
||||||
def create_fetcher(self, build_options, ctx):
|
def create_fetcher(self, build_options, ctx):
|
||||||
use_real_shipit = (
|
use_real_shipit = (
|
||||||
ShipitTransformerFetcher.available() and build_options.use_shipit
|
ShipitTransformerFetcher.available() and build_options.use_shipit
|
||||||
|
Reference in New Issue
Block a user