diff --git a/build/fbcode_builder/getdeps/builder.py b/build/fbcode_builder/getdeps/builder.py index f1c51ba45..e97c68e46 100644 --- a/build/fbcode_builder/getdeps/builder.py +++ b/build/fbcode_builder/getdeps/builder.py @@ -98,6 +98,14 @@ class BuilderBase(object): dep_dirs = self.get_dev_run_extra_path_dirs(install_dirs, dep_munger) dep_munger.emit_dev_run_script(script_path, dep_dirs) + @property + def num_jobs(self) -> int: + # 1.5 GiB is a lot to assume, but it's typical of Facebook-style C++. + # Some manifests are even heavier and should override. + return self.build_opts.get_num_jobs( + int(self.manifest.get("build", "job_weight_mib", 1536, ctx=self.ctx)) + ) + def run_tests( self, install_dirs, schedule_type, owner, test_filter, retry, no_testpilot ): @@ -161,11 +169,7 @@ class MakeBuilder(BuilderBase): # Need to ensure that PREFIX is set prior to install because # libbpf uses it when generating its pkg-config file. # The lowercase prefix is used by some projects. - cmd = ( - ["make", "-j%s" % self.build_opts.num_jobs] - + self.build_args - + self._get_prefix() - ) + cmd = ["make", "-j%s" % self.num_jobs] + self.build_args + self._get_prefix() self._run_cmd(cmd, env=env) install_cmd = ["make"] + self.install_args + self._get_prefix() @@ -189,7 +193,7 @@ class CMakeBootStrapBuilder(MakeBuilder): [ "./bootstrap", "--prefix=" + self.inst_dir, - f"--parallel={self.build_opts.num_jobs}", + f"--parallel={self.num_jobs}", ] ) super(CMakeBootStrapBuilder, self)._build(install_dirs, reconfigure) @@ -247,7 +251,7 @@ class AutoconfBuilder(BuilderBase): self._run_cmd(["autoreconf", "-ivf"], cwd=self.src_dir, env=env) configure_cmd = [configure_path, "--prefix=" + self.inst_dir] + self.args self._run_cmd(configure_cmd, env=env) - self._run_cmd(["make", "-j%s" % self.build_opts.num_jobs], env=env) + self._run_cmd(["make", "-j%s" % self.num_jobs], env=env) self._run_cmd(["make", "install"], env=env) @@ -282,7 +286,7 @@ class Iproute2Builder(BuilderBase): shutil.rmtree(self.build_dir) shutil.copytree(self.src_dir, self.build_dir) self._patch() - self._run_cmd(["make", "-j%s" % self.build_opts.num_jobs], env=env) + self._run_cmd(["make", "-j%s" % self.num_jobs], env=env) install_cmd = ["make", "install", "DESTDIR=" + self.inst_dir] for d in ["include", "lib"]: @@ -314,7 +318,7 @@ class BistroBuilder(BuilderBase): "make", "install", "-j", - str(self.build_opts.num_jobs), + str(self.num_jobs), ], cwd=os.path.join(p, "cmake", "Release"), env=env, @@ -660,7 +664,7 @@ if __name__ == "__main__": "--config", "Release", "-j", - str(self.build_opts.num_jobs), + str(self.num_jobs), ], env=env, ) @@ -781,7 +785,7 @@ if __name__ == "__main__": "--buck-test-info", buck_test_info_name, "--retry=%d" % retry, - "-j=%s" % str(self.build_opts.num_jobs), + "-j=%s" % str(self.num_jobs), "--test-config", "platform=%s" % machine_suffix, "buildsystem=getdeps", @@ -794,7 +798,7 @@ if __name__ == "__main__": "--buck-test-info", buck_test_info_name, "--retry=%d" % retry, - "-j=%s" % str(self.build_opts.num_jobs), + "-j=%s" % str(self.num_jobs), "--print-long-results", ] @@ -854,7 +858,7 @@ if __name__ == "__main__": use_cmd_prefix=use_cmd_prefix, ) else: - args = [ctest, "--output-on-failure", "-j", str(self.build_opts.num_jobs)] + args = [ctest, "--output-on-failure", "-j", str(self.num_jobs)] if test_filter: args += ["-R", test_filter] @@ -918,11 +922,11 @@ class OpenSSLBuilder(BuilderBase): args = ["VC-WIN64A-masm", "-utf-8"] elif self.build_opts.is_darwin(): make = "make" - make_j_args = ["-j%s" % self.build_opts.num_jobs] + make_j_args = ["-j%s" % self.num_jobs] args = ["darwin64-x86_64-cc"] elif self.build_opts.is_linux(): make = "make" - make_j_args = ["-j%s" % self.build_opts.num_jobs] + make_j_args = ["-j%s" % self.num_jobs] args = ( ["linux-x86_64"] if not self.build_opts.is_arm() else ["linux-aarch64"] ) @@ -996,7 +1000,7 @@ class Boost(BuilderBase): self._run_cmd( [ b2, - "-j%s" % self.build_opts.num_jobs, + "-j%s" % self.num_jobs, "--prefix=%s" % self.inst_dir, "--builddir=%s" % self.build_dir, ] @@ -1140,7 +1144,312 @@ install(FILES sqlite3.h sqlite3ext.h DESTINATION include) "--config", "Release", "-j", - str(self.build_opts.num_jobs), + str(self.num_jobs), ], env=env, ) + + +class CargoBuilder(BuilderBase): + def __init__( + self, + build_opts, + ctx, + manifest, + src_dir, + build_dir, + inst_dir, + build_doc, + workspace_dir, + manifests_to_build, + loader, + ): + super(CargoBuilder, self).__init__( + build_opts, ctx, manifest, src_dir, build_dir, inst_dir + ) + self.build_doc = build_doc + self.ws_dir = workspace_dir + self.manifests_to_build = manifests_to_build and manifests_to_build.split(",") + self.loader = loader + + def run_cargo(self, install_dirs, operation, args=None): + args = args or [] + env = self._compute_env(install_dirs) + # Enable using nightly features with stable compiler + env["RUSTC_BOOTSTRAP"] = "1" + env["LIBZ_SYS_STATIC"] = "1" + cmd = [ + "cargo", + operation, + "--workspace", + "-j%s" % self.num_jobs, + ] + args + self._run_cmd(cmd, cwd=self.workspace_dir(), env=env) + + def build_source_dir(self): + return os.path.join(self.build_dir, "source") + + def workspace_dir(self): + return os.path.join(self.build_source_dir(), self.ws_dir or "") + + def manifest_dir(self, manifest): + return os.path.join(self.build_source_dir(), manifest) + + def recreate_dir(self, src, dst): + if os.path.isdir(dst): + shutil.rmtree(dst) + shutil.copytree(src, dst) + + def _build(self, install_dirs, reconfigure): + build_source_dir = self.build_source_dir() + self.recreate_dir(self.src_dir, build_source_dir) + + dot_cargo_dir = os.path.join(build_source_dir, ".cargo") + if not os.path.isdir(dot_cargo_dir): + os.mkdir(dot_cargo_dir) + + with open(os.path.join(dot_cargo_dir, "config"), "w+") as f: + f.write( + """\ +[build] +target-dir = '''{}''' + +[net] +git-fetch-with-cli = true + +[profile.dev] +debug = false +incremental = false +""".format( + self.build_dir.replace("\\", "\\\\") + ) + ) + + if self.ws_dir is not None: + self._patchup_workspace() + + try: + from .facebook.rust import vendored_crates + + vendored_crates(self.build_opts, build_source_dir) + except ImportError: + # This FB internal module isn't shippped to github, + # so just rely on cargo downloading crates on it's own + pass + + if self.manifests_to_build is None: + self.run_cargo( + install_dirs, + "build", + ["--out-dir", os.path.join(self.inst_dir, "bin"), "-Zunstable-options"], + ) + else: + for manifest in self.manifests_to_build: + self.run_cargo( + install_dirs, + "build", + [ + "--out-dir", + os.path.join(self.inst_dir, "bin"), + "-Zunstable-options", + "--manifest-path", + self.manifest_dir(manifest), + ], + ) + + self.recreate_dir(build_source_dir, os.path.join(self.inst_dir, "source")) + + def run_tests( + self, install_dirs, schedule_type, owner, test_filter, retry, no_testpilot + ): + if test_filter: + args = ["--", test_filter] + else: + args = [] + + if self.manifests_to_build is None: + self.run_cargo(install_dirs, "test", args) + if self.build_doc: + self.run_cargo(install_dirs, "doc", ["--no-deps"]) + else: + for manifest in self.manifests_to_build: + margs = ["--manifest-path", self.manifest_dir(manifest)] + self.run_cargo(install_dirs, "test", args + margs) + if self.build_doc: + self.run_cargo(install_dirs, "doc", ["--no-deps"] + margs) + + def _patchup_workspace(self): + """ + This method makes some assumptions about the state of the project and + its cargo dependendies: + 1. Crates from cargo dependencies can be extracted from Cargo.toml files + using _extract_crates function. It is using a heuristic so check its + code to understand how it is done. + 2. The extracted cargo dependencies crates can be found in the + dependency's install dir using _resolve_crate_to_path function + which again is using a heuristic. + + Notice that many things might go wrong here. E.g. if someone depends + on another getdeps crate by writing in their Cargo.toml file: + + my-rename-of-crate = { package = "crate", git = "..." } + + they can count themselves lucky because the code will raise an + Exception. There migh be more cases where the code will silently pass + producing bad results. + """ + workspace_dir = self.workspace_dir() + config = self._resolve_config() + if config: + with open(os.path.join(workspace_dir, "Cargo.toml"), "r+") as f: + manifest_content = f.read() + if "[package]" not in manifest_content: + # A fake manifest has to be crated to change the virtual + # manifest into a non-virtual. The virtual manifests are limited + # in many ways and the inability to define patches on them is + # one. Check https://github.com/rust-lang/cargo/issues/4934 to + # see if it is resolved. + f.write( + """ + [package] + name = "fake_manifest_of_{}" + version = "0.0.0" + [lib] + path = "/dev/null" + """.format( + self.manifest.name + ) + ) + else: + f.write("\n") + f.write(config) + + def _resolve_config(self): + """ + Returns a configuration to be put inside root Cargo.toml file which + patches the dependencies git code with local getdeps versions. + See https://doc.rust-lang.org/cargo/reference/manifest.html#the-patch-section + """ + dep_to_git = self._resolve_dep_to_git() + dep_to_crates = CargoBuilder._resolve_dep_to_crates( + self.build_source_dir(), dep_to_git + ) + + config = [] + for name in sorted(dep_to_git.keys()): + git_conf = dep_to_git[name] + crates = sorted(dep_to_crates.get(name, [])) + if not crates: + continue # nothing to patch, move along + crates_patches = [ + '{} = {{ path = "{}" }}'.format( + crate, + CargoBuilder._resolve_crate_to_path(crate, git_conf).replace( + "\\", "\\\\" + ), + ) + for crate in crates + ] + + config.append( + '[patch."{0}"]\n'.format(git_conf["repo_url"]) + + "\n".join(crates_patches) + ) + return "\n".join(config) + + def _resolve_dep_to_git(self): + """ + For each direct dependency of the currently build manifest check if it + is also cargo-builded and if yes then extract it's git configs and + install dir + """ + dependencies = self.manifest.get_dependencies(self.ctx) + if not dependencies: + return [] + + dep_to_git = {} + for dep in dependencies: + dep_manifest = self.loader.load_manifest(dep) + dep_builder = dep_manifest.get("build", "builder", ctx=self.ctx) + if dep_builder not in ["cargo", "nop"] or dep == "rust": + # This is a direct dependency, but it is not build with cargo + # and it is not simply copying files with nop, so ignore it. + # The "rust" dependency is an exception since it contains the + # toolchain. + continue + + git_conf = dep_manifest.get_section_as_dict("git", self.ctx) + if "repo_url" not in git_conf: + raise Exception( + "A cargo dependency requires git.repo_url to be defined." + ) + source_dir = self.loader.get_project_install_dir(dep_manifest) + if dep_builder == "cargo": + source_dir = os.path.join(source_dir, "source") + git_conf["source_dir"] = source_dir + dep_to_git[dep] = git_conf + return dep_to_git + + @staticmethod + def _resolve_dep_to_crates(build_source_dir, dep_to_git): + """ + This function traverse the build_source_dir in search of Cargo.toml + files, extracts the crate names from them using _extract_crates + function and returns a merged result containing crate names per + dependency name from all Cargo.toml files in the project. + """ + if not dep_to_git: + return {} # no deps, so don't waste time traversing files + + dep_to_crates = {} + for root, _, files in os.walk(build_source_dir): + for f in files: + if f == "Cargo.toml": + more_dep_to_crates = CargoBuilder._extract_crates( + os.path.join(root, f), dep_to_git + ) + for name, crates in more_dep_to_crates.items(): + dep_to_crates.setdefault(name, set()).update(crates) + return dep_to_crates + + @staticmethod + def _extract_crates(cargo_toml_file, dep_to_git): + """ + This functions reads content of provided cargo toml file and extracts + crate names per each dependency. The extraction is done by a heuristic + so it might be incorrect. + """ + deps_to_crates = {} + with open(cargo_toml_file, "r") as f: + for line in f.readlines(): + if line.startswith("#") or "git = " not in line: + continue # filter out commented lines and ones without git deps + for name, conf in dep_to_git.items(): + if 'git = "{}"'.format(conf["repo_url"]) in line: + pkg_template = ' package = "' + if pkg_template in line: + crate_name, _, _ = line.partition(pkg_template)[ + 2 + ].partition('"') + else: + crate_name, _, _ = line.partition("=") + deps_to_crates.setdefault(name, set()).add(crate_name.strip()) + return deps_to_crates + + @staticmethod + def _resolve_crate_to_path(crate, git_conf): + """ + Tries to find in git_conf["inst_dir"] by searching a [package] + keyword followed by name = "". + """ + source_dir = git_conf["source_dir"] + search_pattern = '[package]\nname = "{}"'.format(crate) + + for root, _, files in os.walk(source_dir): + for fname in files: + if fname == "Cargo.toml": + with open(os.path.join(root, fname), "r") as f: + if search_pattern in f.read(): + return root + + raise Exception("Failed to found crate {} in path {}".format(crate, source_dir)) diff --git a/build/fbcode_builder/getdeps/buildopts.py b/build/fbcode_builder/getdeps/buildopts.py index 80eea9dd7..b0b129282 100644 --- a/build/fbcode_builder/getdeps/buildopts.py +++ b/build/fbcode_builder/getdeps/buildopts.py @@ -16,7 +16,7 @@ from .copytree import containing_repo_type from .envfuncs import Env, add_path_entry from .fetcher import get_fbsource_repo_data from .manifest import ContextGenerator -from .platform import HostType, is_windows +from .platform import HostType, is_windows, get_available_ram def detect_project(path): @@ -65,10 +65,6 @@ class BuildOptions(object): vcvars_path - Path to external VS toolchain's vsvarsall.bat shared_libs - whether to build shared libraries """ - if not num_jobs: - import multiprocessing - - num_jobs = max(1, multiprocessing.cpu_count() // 2) if not install_dir: install_dir = os.path.join(scratch_dir, "installed") @@ -90,7 +86,7 @@ class BuildOptions(object): else: self.fbsource_dir = None - self.num_jobs = num_jobs + self.specified_num_jobs = num_jobs self.scratch_dir = scratch_dir self.install_dir = install_dir self.fbcode_builder_dir = fbcode_builder_dir @@ -157,6 +153,17 @@ class BuildOptions(object): def is_linux(self): return self.host_type.is_linux() + def get_num_jobs(self, job_weight): + """Given an estimated job_weight in MiB, compute a reasonable concurrency limit.""" + if self.specified_num_jobs: + return self.specified_num_jobs + + available_ram = get_available_ram() + + import multiprocessing + + return max(1, min(multiprocessing.cpu_count(), available_ram // job_weight)) + def get_context_generator(self, host_tuple=None, facebook_internal=None): """Create a manifest ContextGenerator for the specified target platform.""" if host_tuple is None: diff --git a/build/fbcode_builder/getdeps/cargo.py b/build/fbcode_builder/getdeps/cargo.py index 74d0cbf7c..3e9255f1b 100644 --- a/build/fbcode_builder/getdeps/cargo.py +++ b/build/fbcode_builder/getdeps/cargo.py @@ -42,7 +42,7 @@ class CargoBuilder(BuilderBase): "cargo", operation, "--workspace", - "-j%s" % self.build_opts.num_jobs, + "-j%s" % self.num_jobs, ] + args self._run_cmd(cmd, cwd=self.workspace_dir(), env=env) diff --git a/build/fbcode_builder/getdeps/manifest.py b/build/fbcode_builder/getdeps/manifest.py index e8ad4686d..8bff37eb9 100644 --- a/build/fbcode_builder/getdeps/manifest.py +++ b/build/fbcode_builder/getdeps/manifest.py @@ -63,6 +63,7 @@ SCHEMA = { "builder": REQUIRED, "subdir": OPTIONAL, "build_in_src_dir": OPTIONAL, + "job_weight_mib": OPTIONAL, }, }, "msbuild": {"optional_section": True, "fields": {"project": REQUIRED}}, diff --git a/build/fbcode_builder/getdeps/platform.py b/build/fbcode_builder/getdeps/platform.py index c7c7547e8..eb03476dc 100644 --- a/build/fbcode_builder/getdeps/platform.py +++ b/build/fbcode_builder/getdeps/platform.py @@ -48,6 +48,104 @@ def get_linux_type(): return "linux", name, version_id +# Ideally we'd use a common library like `psutil` to read system information, +# but getdeps can't take third-party dependencies. + + +def _get_available_ram_linux(): + # TODO: Ideally, this function would inspect the current cgroup for any + # limits, rather than solely relying on system RAM. + with open("/proc/meminfo") as f: + for line in f: + try: + key, value = line.split(":", 1) + except ValueError: + continue + suffix = " kB\n" + if key == "MemAvailable" and value.endswith(suffix): + value = value[: -len(suffix)] + try: + return int(value) // 1024 + except ValueError: + continue + + raise NotImplementedError("/proc/meminfo had no valid MemAvailable") + + +def _get_available_ram_macos() -> int: + import ctypes.util + + libc = ctypes.CDLL(ctypes.util.find_library("libc"), use_errno=True) + sysctlbyname = libc.sysctlbyname + sysctlbyname.restype = ctypes.c_int + sysctlbyname.argtypes = [ + ctypes.c_char_p, + ctypes.c_void_p, + ctypes.POINTER(ctypes.c_size_t), + ctypes.c_void_p, + ctypes.c_size_t, + ] + # TODO: There may be some way to approximate an availability + # metric, but just use total RAM for now. + memsize = ctypes.c_int64() + memsizesize = ctypes.c_size_t(8) + res = sysctlbyname( + b"hw.memsize", ctypes.byref(memsize), ctypes.byref(memsizesize), None, 0 + ) + if res != 0: + raise NotImplementedError( + f"failed to retrieve hw.memsize sysctl: {ctypes.get_errno()}" + ) + return memsize.value // (1024 * 1024) + + +def _get_available_ram_windows() -> int: + import ctypes + + DWORD = ctypes.c_uint32 + QWORD = ctypes.c_uint64 + + class MEMORYSTATUSEX(ctypes.Structure): + _fields_ = [ + ("dwLength", DWORD), + ("dwMemoryLoad", DWORD), + ("ullTotalPhys", QWORD), + ("ullAvailPhys", QWORD), + ("ullTotalPageFile", QWORD), + ("ullAvailPageFile", QWORD), + ("ullTotalVirtual", QWORD), + ("ullAvailVirtual", QWORD), + ("ullExtendedVirtual", QWORD), + ] + + ms = MEMORYSTATUSEX() + ms.dwLength = ctypes.sizeof(ms) + # pyre-ignore[16] + res = ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(ms)) + if res == 0: + raise NotImplementedError("error calling GlobalMemoryStatusEx") + + # This is fuzzy, but AvailPhys is too conservative, and AvailTotal is too + # aggressive, so average the two. It's okay for builds to use some swap. + return (ms.ullAvailPhys + ms.ullTotalPhys) // (2 * 1024 * 1024) + + +def get_available_ram() -> int: + """ + Returns a platform-appropriate available RAM metric in MiB. + """ + if sys.platform == "linux": + return _get_available_ram_linux() + elif sys.platform == "darwin": + return _get_available_ram_macos() + elif sys.platform == "win32": + return _get_available_ram_windows() + else: + raise NotImplementedError( + f"platform {sys.platform} does not have an implementation of get_available_ram" + ) + + class HostType(object): def __init__(self, ostype=None, distro=None, distrovers=None): if ostype is None: diff --git a/build/fbcode_builder/manifests/boost b/build/fbcode_builder/manifests/boost index ad72fd5b5..490c0c385 100644 --- a/build/fbcode_builder/manifests/boost +++ b/build/fbcode_builder/manifests/boost @@ -53,6 +53,7 @@ boost-devel [build] builder = boost +job_weight_mib = 512 [b2.args] --with-atomic diff --git a/build/fbcode_builder/manifests/fboss b/build/fbcode_builder/manifests/fboss index f29873e72..d7e8a398f 100644 --- a/build/fbcode_builder/manifests/fboss +++ b/build/fbcode_builder/manifests/fboss @@ -9,6 +9,8 @@ repo_url = https://github.com/facebook/fboss.git [build.os=linux] builder = cmake +# fboss files take a lot of RAM to compile. +job_weight_mib = 3072 [build.not(os=linux)] builder = nop diff --git a/build/fbcode_builder/manifests/fbthrift b/build/fbcode_builder/manifests/fbthrift index a8a17ba72..01f2bbf2c 100644 --- a/build/fbcode_builder/manifests/fbthrift +++ b/build/fbcode_builder/manifests/fbthrift @@ -9,6 +9,7 @@ repo_url = https://github.com/facebook/fbthrift.git [build] builder = cmake +job_weight_mib = 2048 [dependencies] bison diff --git a/build/fbcode_builder/manifests/folly b/build/fbcode_builder/manifests/folly index 9647b17f8..7bb4e240f 100644 --- a/build/fbcode_builder/manifests/folly +++ b/build/fbcode_builder/manifests/folly @@ -9,6 +9,7 @@ repo_url = https://github.com/facebook/folly.git [build] builder = cmake +job_weight_mib = 1024 [dependencies] gflags