1
0
mirror of https://github.com/facebook/proxygen.git synced 2025-08-10 05:22:59 +03:00
Files
proxygen/build/fbcode_builder/getdeps/fetcher.py
Wez Furlong c4da97262f fbcode_builder: getdeps: add GitFetcher
Summary: this fetcher knows how to check out sources via git.

Reviewed By: simpkins

Differential Revision: D14691000

fbshipit-source-id: 60f1ffbfed7b32a019aef6aa70ae0903f2782451
2019-05-03 16:00:39 -07:00

217 lines
8.3 KiB
Python

# Copyright (c) 2019-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import re
import subprocess
from .runcmd import run_cmd
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
class ChangeStatus(object):
""" Indicates the nature of changes that happened while updating
the source directory. There are two broad uses:
* When extracting archives for third party software we want to
know that we did something (eg: we either extracted code or
we didn't do anything)
* For 1st party code where we use shipit to transform the code,
we want to know if we changed anything so that we can perform
a build, but we generally want to be a little more nuanced
and be able to distinguish between just changing a source file
and whether we might need to reconfigure the build system.
"""
def __init__(self, all_changed=False):
""" Construct a ChangeStatus object. The default is to create
a status that indicates no changes, but passing all_changed=True
will create one that indicates that everything changed """
if all_changed:
self.source_files = 1
self.make_files = 1
else:
self.source_files = 0
self.make_files = 0
def record_change(self, file_name):
""" Used by the shipit fetcher to record changes as it updates
files in the destination. If the file name might be one used
in the cmake build system that we use for 1st party code, then
record that as a "make file" change. We could broaden this
to match any file used by various build systems, but it is
only really useful for our internal cmake stuff at this time.
If the file isn't a build file and is under the `fbcode_builder`
dir then we don't class that as an interesting change that we
might need to rebuild, so we ignore it.
Otherwise we record the file as a source file change. """
if "cmake" in file_name.lower():
self.make_files += 1
return
if "/fbcode_builder/" in file_name:
return
self.source_files += 1
def sources_changed(self):
""" Returns true if any source files were changed during
an update operation. This will typically be used to decide
that the build system to be run on the source dir in an
incremental mode """
return self.source_files > 0
def build_changed(self):
""" Returns true if any build files were changed during
an update operation. This will typically be used to decidfe
that the build system should be reconfigured and re-run
as a full build """
return self.make_files > 0
class Fetcher(object):
""" The Fetcher is responsible for fetching and extracting the
sources for project. The Fetcher instance defines where the
extracted data resides and reports this to the consumer via
its `get_src_dir` method. """
def update(self):
""" Brings the src dir up to date, ideally minimizing
changes so that a subsequent build doesn't over-build.
Returns a ChangeStatus object that helps the caller to
understand the nature of the changes required during
the update. """
return ChangeStatus()
def clean(self):
""" Reverts any changes that might have been made to
the src dir """
pass
def hash(self):
""" Returns a hash that identifies the version of the code in the
working copy. For a git repo this is commit hash for the working
copy. For other Fetchers this should relate to the version of
the code in the src dir. The intent is that if a manifest
changes the version/rev of a project that the hash be different. """
pass
def get_src_dir(self):
""" Returns the source directory that the project was
extracted into """
pass
class GitFetcher(Fetcher):
def __init__(self, build_options, manifest, repo_url, rev):
# Extract the host/path portions of the URL and generate a flattened
# directory name. eg:
# github.com/facebook/folly.git -> github.com-facebook-folly.git
url = urlparse(repo_url)
directory = "%s%s" % (url.netloc, url.path)
for s in ["/", "\\", ":"]:
directory = directory.replace(s, "-")
# Place it in a repos dir in the scratch space
repos_dir = os.path.join(build_options.scratch_dir, "repos")
if not os.path.exists(repos_dir):
os.makedirs(repos_dir)
self.repo_dir = os.path.join(repos_dir, directory)
if not rev:
hash_file = os.path.join(
build_options.project_hashes,
re.sub("\\.git$", "-rev.txt", url.path[1:]),
)
if os.path.exists(hash_file):
with open(hash_file, "r") as f:
data = f.read()
m = re.match("Subproject commit ([a-fA-F0-9]{40})", data)
if not m:
raise Exception("Failed to parse rev from %s" % hash_file)
rev = m.group(1)
print("Using pinned rev %s for %s" % (rev, repo_url))
self.rev = rev or "master"
self.origin_repo = repo_url
self.manifest = manifest
def _update(self):
current_hash = (
subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=self.repo_dir)
.strip()
.decode("utf-8")
)
target_hash = (
subprocess.check_output(["git", "rev-parse", self.rev], cwd=self.repo_dir)
.strip()
.decode("utf-8")
)
if target_hash == current_hash:
# It's up to date, so there are no changes. This doesn't detect eg:
# if origin/master moved and rev='master', but that's ok for our purposes;
# we should be using explicit hashes or eg: a stable branch for the cases
# that we care about, and it isn't unreasonable to require that the user
# explicitly perform a clean build if those have moved. For the most
# part we prefer that folks build using a release tarball from github
# rather than use the git protocol, as it is generally a bit quicker
# to fetch and easier to hash and verify tarball downloads.
return ChangeStatus()
print("Updating %s -> %s" % (self.repo_dir, self.rev))
run_cmd(["git", "fetch", "origin"], cwd=self.repo_dir)
run_cmd(["git", "checkout", self.rev], cwd=self.repo_dir)
run_cmd(["git", "submodule", "update", "--init"], cwd=self.repo_dir)
return ChangeStatus(True)
def update(self):
if os.path.exists(self.repo_dir):
return self._update()
self._clone()
return ChangeStatus(True)
def _clone(self):
print("Cloning %s..." % self.origin_repo)
# The basename/dirname stuff allows us to dance around issues where
# eg: this python process is native win32, but the git.exe is cygwin
# or msys and doesn't like the absolute windows path that we'd otherwise
# pass to it. Careful use of cwd helps avoid headaches with cygpath.
run_cmd(
[
"git",
"clone",
"--depth=100",
"--",
self.origin_repo,
os.path.basename(self.repo_dir),
],
cwd=os.path.dirname(self.repo_dir),
)
self._update()
def clean(self):
if os.path.exists(self.repo_dir):
run_cmd(["git", "clean", "-fxd"], cwd=self.repo_dir)
def hash(self):
""" Returns a hash that identifies the version of the code in the
working copy """
return (
subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=self.repo_dir)
.strip()
.decode("utf-8")[0:6]
)
def get_src_dir(self):
return self.repo_dir