fbcode_builder: getdeps: add ArchiveFetcher

Summary: this fetcher knows how to download a URL that references an archive and extract the sources from it. Compressed tarballs and zipfiles are supported. Reviewed By: simpkins Differential Revision: D14690994 fbshipit-source-id: ffbbd1d9479b0d5aa3e5cd13fe11a560e9d01c2a
2025-08-08 18:02:05 +03:00 · 2019-05-03 15:52:39 -07:00
parent c4da97262f
commit 2d3cfa077f
2 changed files with 154 additions and 1 deletions
--- a/build/fbcode_builder/getdeps/fetcher.py
+++ b/build/fbcode_builder/getdeps/fetcher.py
@@ -7,17 +7,26 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 import hashlib
 import os
 import re
 import shutil
 import subprocess
 import sys
 import tarfile
 import time
 import zipfile
 from .platform import is_windows
 from .runcmd import run_cmd
 try:
    from urlparse import urlparse
    from urllib import urlretrieve
 except ImportError:
    from urllib.parse import urlparse
    from urllib.request import urlretrieve
 class ChangeStatus(object):
@@ -214,3 +223,141 @@ class GitFetcher(Fetcher):
    def get_src_dir(self):
        return self.repo_dir
 def download_url_to_file_with_progress(url, file_name):
    print("Download %s -> %s ..." % (url, file_name))
    class Progress(object):
        last_report = 0
        def progress(self, count, block, total):
            if total == -1:
                total = "(Unknown)"
            amount = count * block
            if sys.stdout.isatty():
                sys.stdout.write("\r downloading %s of %s " % (amount, total))
            else:
                # When logging to CI logs, avoid spamming the logs and print
                # status every few seconds
                now = time.time()
                if now - self.last_report > 5:
                    sys.stdout.write(".. %s of %s " % (amount, total))
                    self.last_report = now
            sys.stdout.flush()
    progress = Progress()
    start = time.time()
    (_filename, headers) = urlretrieve(url, file_name, reporthook=progress.progress)
    end = time.time()
    sys.stdout.write(" [Complete in %f seconds]\n" % (end - start))
    sys.stdout.flush()
    print("%s" % (headers))
 class ArchiveFetcher(Fetcher):
    def __init__(self, build_options, manifest, url, sha256):
        self.manifest = manifest
        self.url = url
        self.sha256 = sha256
        self.build_options = build_options
        url = urlparse(self.url)
        basename = "%s-%s" % (manifest.name, os.path.basename(url.path))
        self.file_name = os.path.join(build_options.scratch_dir, "downloads", basename)
        self.src_dir = os.path.join(build_options.scratch_dir, "extracted", basename)
        self.hash_file = self.src_dir + ".hash"
    def _verify_hash(self):
        h = hashlib.sha256()
        with open(self.file_name, "rb") as f:
            while True:
                block = f.read(8192)
                if not block:
                    break
                h.update(block)
        digest = h.hexdigest()
        if digest != self.sha256:
            os.unlink(self.file_name)
            raise Exception(
                "%s: expected sha256 %s but got %s" % (self.url, self.sha256, digest)
            )
    def _download(self):
        download_dir = os.path.dirname(self.file_name)
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)
        download_url_to_file_with_progress(self.url, self.file_name)
        self._verify_hash()
    def clean(self):
        if os.path.exists(self.src_dir):
            shutil.rmtree(self.src_dir)
    def update(self):
        try:
            with open(self.hash_file, "r") as f:
                saved_hash = f.read().strip()
                if saved_hash == self.sha256 and os.path.exists(self.src_dir):
                    # Everything is up to date
                    return ChangeStatus()
                print(
                    "saved hash %s doesn't match expected hash %s, re-validating"
                    % (saved_hash, self.sha256)
                )
                os.unlink(self.hash_file)
        except EnvironmentError:
            pass
        # If we got here we know the contents of src_dir are either missing
        # or wrong, so blow away whatever happened to be there first.
        if os.path.exists(self.src_dir):
            shutil.rmtree(self.src_dir)
        # If we already have a file here, make sure it looks legit before
        # proceeding: any errors and we just remove it and re-download
        if os.path.exists(self.file_name):
            try:
                self._verify_hash()
            except Exception:
                if os.path.exists(self.file_name):
                    os.unlink(self.file_name)
        if not os.path.exists(self.file_name):
            self._download()
        if tarfile.is_tarfile(self.file_name):
            opener = tarfile.open
        elif zipfile.is_zipfile(self.file_name):
            opener = zipfile.ZipFile
        else:
            raise Exception("don't know how to extract %s" % self.file_name)
        os.makedirs(self.src_dir)
        print("Extract %s -> %s" % (self.file_name, self.src_dir))
        t = opener(self.file_name)
        if is_windows():
            # Ensure that we don't fall over when dealing with long paths
            # on windows
            src = r"\\?\%s" % os.path.normpath(self.src_dir)
        else:
            src = self.src_dir
        # The `str` here is necessary to ensure that we don't pass a unicode
        # object down to tarfile.extractall on python2.  When extracting
        # the boost tarball it makes some assumptions and tries to convert
        # a non-ascii path to ascii and throws.
        src = str(src)
        t.extractall(src)
        with open(self.hash_file, "w") as f:
            f.write(self.sha256)
        return ChangeStatus(True)
    def hash(self):
        return self.sha256[0:6]
    def get_src_dir(self):
        return self.src_dir
--- a/build/fbcode_builder/getdeps/manifest.py
+++ b/build/fbcode_builder/getdeps/manifest.py
@@ -11,7 +11,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import io
 from .expr import parse_expr
-from .fetcher import GitFetcher
+from .fetcher import ArchiveFetcher, GitFetcher
 try:
@@ -263,6 +263,12 @@ class ManifestParser(object):
            rev = self.get("git", "rev")
            return GitFetcher(build_options, self, repo_url, rev)
        url = self.get("download", "url", ctx=ctx)
        if url:
            return ArchiveFetcher(
                build_options, self, url, self.get("download", "sha256", ctx=ctx)
            )
        raise KeyError(
            "project %s has no fetcher configuration matching %r" % (self.name, ctx)
        )