1
0
mirror of https://github.com/moby/buildkit.git synced 2025-08-01 02:04:26 +03:00
Files
buildkit/source/http/source.go
2025-06-10 21:33:15 -07:00

625 lines
16 KiB
Go

package http
import (
"bytes"
"cmp"
"context"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"mime"
"net/http"
"net/url"
"os"
"path"
"path/filepath"
"slices"
"strconv"
"strings"
"time"
"github.com/moby/buildkit/cache"
"github.com/moby/buildkit/session"
"github.com/moby/buildkit/session/secrets"
"github.com/moby/buildkit/snapshot"
"github.com/moby/buildkit/solver"
"github.com/moby/buildkit/solver/pb"
"github.com/moby/buildkit/source"
srctypes "github.com/moby/buildkit/source/types"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/tracing"
"github.com/moby/buildkit/version"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
)
const (
HTTPAuthHeaderSecretPrefix = "HTTP_AUTH_HEADER_"
HTTPAuthTokenSecretPrefix = "HTTP_AUTH_TOKEN_"
)
// supportedUserHeaders defines supported user-defined header fields. Fields
// not included here will be silently dropped.
var supportedUserDefinedHeaders = map[string]bool{
http.CanonicalHeaderKey("accept"): true,
http.CanonicalHeaderKey("user-agent"): true,
}
type Opt struct {
CacheAccessor cache.Accessor
Transport http.RoundTripper
}
type httpSource struct {
cache cache.Accessor
transport http.RoundTripper
}
func NewSource(opt Opt) (source.Source, error) {
transport := opt.Transport
if transport == nil {
transport = tracing.DefaultTransport
}
hs := &httpSource{
cache: opt.CacheAccessor,
transport: transport,
}
return hs, nil
}
func (hs *httpSource) Schemes() []string {
return []string{srctypes.HTTPScheme, srctypes.HTTPSScheme}
}
func (hs *httpSource) Identifier(scheme, ref string, attrs map[string]string, platform *pb.Platform) (source.Identifier, error) {
id, err := NewHTTPIdentifier(ref, scheme == "https")
if err != nil {
return nil, err
}
for k, v := range attrs {
switch k {
case pb.AttrHTTPChecksum:
dgst, err := digest.Parse(v)
if err != nil {
return nil, err
}
id.Checksum = dgst
case pb.AttrHTTPFilename:
id.Filename = v
case pb.AttrHTTPPerm:
i, err := strconv.ParseInt(v, 0, 64)
if err != nil {
return nil, err
}
id.Perm = int(i)
case pb.AttrHTTPUID:
i, err := strconv.ParseInt(v, 0, 64)
if err != nil {
return nil, err
}
id.UID = int(i)
case pb.AttrHTTPGID:
i, err := strconv.ParseInt(v, 0, 64)
if err != nil {
return nil, err
}
id.GID = int(i)
case pb.AttrHTTPAuthHeaderSecret:
id.AuthHeaderSecret = v
default:
if name, found := strings.CutPrefix(k, pb.AttrHTTPHeaderPrefix); found {
name = http.CanonicalHeaderKey(name)
if supportedUserDefinedHeaders[name] {
id.Header = append(id.Header, HeaderField{Name: name, Value: v})
}
}
}
}
// Sort header fields to ensure consistent hashing (see urlHash() and
// formatCacheKey())
slices.SortFunc(id.Header, func(a, b HeaderField) int {
return cmp.Compare(a.Name, b.Name)
})
return id, nil
}
type httpSourceHandler struct {
*httpSource
src HTTPIdentifier
refID string
cacheKey digest.Digest
sm *session.Manager
}
func (hs *httpSource) Resolve(ctx context.Context, id source.Identifier, sm *session.Manager, _ solver.Vertex) (source.SourceInstance, error) {
httpIdentifier, ok := id.(*HTTPIdentifier)
if !ok {
return nil, errors.Errorf("invalid http identifier %v", id)
}
return &httpSourceHandler{
src: *httpIdentifier,
httpSource: hs,
sm: sm,
}, nil
}
func (hs *httpSourceHandler) client(g session.Group) *http.Client {
return &http.Client{Transport: newTransport(hs.transport, hs.sm, g)}
}
// urlHash is internal hash the etag is stored by that doesn't leak outside
// this package.
func (hs *httpSourceHandler) urlHash() (digest.Digest, error) {
dt, err := json.Marshal(struct {
Filename []byte
Perm, UID, GID int
AuthHeaderSecret string `json:",omitempty"`
Header []HeaderField
}{
Filename: bytes.Join([][]byte{
[]byte(hs.src.URL),
[]byte(hs.src.Filename),
}, []byte{0}),
Perm: hs.src.Perm,
UID: hs.src.UID,
GID: hs.src.GID,
AuthHeaderSecret: hs.src.AuthHeaderSecret,
Header: hs.src.Header,
})
if err != nil {
return "", err
}
return digest.FromBytes(dt), nil
}
func (hs *httpSourceHandler) formatCacheKey(filename string, dgst digest.Digest, lastModTime string) digest.Digest {
dt, err := json.Marshal(struct {
Filename string
Perm, UID, GID int
Checksum digest.Digest
LastModTime string `json:",omitempty"`
AuthHeaderSecret string `json:",omitempty"`
Header []HeaderField `json:",omitempty"`
}{
Filename: filename,
Perm: hs.src.Perm,
UID: hs.src.UID,
GID: hs.src.GID,
Checksum: dgst,
LastModTime: lastModTime,
AuthHeaderSecret: hs.src.AuthHeaderSecret,
Header: hs.src.Header,
})
if err != nil {
return dgst
}
return digest.FromBytes(dt)
}
func (hs *httpSourceHandler) CacheKey(ctx context.Context, g session.Group, index int) (string, string, solver.CacheOpts, bool, error) {
if hs.src.Checksum != "" {
hs.cacheKey = hs.src.Checksum
return hs.formatCacheKey(getFileName(hs.src.URL, hs.src.Filename, nil), hs.src.Checksum, "").String(), hs.src.Checksum.String(), nil, true, nil
}
uh, err := hs.urlHash()
if err != nil {
return "", "", nil, false, err
}
// look up metadata(previously stored headers) for that URL
mds, err := searchHTTPURLDigest(ctx, hs.cache, uh)
if err != nil {
return "", "", nil, false, errors.Wrapf(err, "failed to search metadata for %s", uh)
}
req, err := hs.newHTTPRequest(ctx, g)
if err != nil {
return "", "", nil, false, err
}
m := map[string]cacheRefMetadata{}
// If we request a single ETag in 'If-None-Match', some servers omit the
// unambiguous ETag in their response.
// See: https://github.com/moby/buildkit/issues/905
var onlyETag string
if len(mds) > 0 {
for _, md := range mds {
// if metaDigest := getMetaDigest(si); metaDigest == hs.formatCacheKey("") {
if etag := md.getETag(); etag != "" {
if dgst := md.getHTTPChecksum(); dgst != "" {
// check that ref still exists
ref, err := hs.cache.Get(ctx, md.ID(), nil)
if err == nil {
m[etag] = md
defer ref.Release(context.WithoutCancel(ctx))
}
}
}
// }
}
if len(m) > 0 {
etags := make([]string, 0, len(m))
for t := range m {
etags = append(etags, t)
}
req.Header.Set("If-None-Match", strings.Join(etags, ", "))
if len(etags) == 1 {
onlyETag = etags[0]
}
}
}
client := hs.client(g)
// Some servers seem to have trouble supporting If-None-Match properly even
// though they return ETag-s. So first, optionally try a HEAD request with
// manual ETag value comparison.
if len(m) > 0 {
req.Method = "HEAD"
// we need to add accept-encoding header manually because stdlib only adds it to GET requests
// some servers will return different etags if Accept-Encoding header is different
req.Header.Set("Accept-Encoding", "gzip")
resp, err := client.Do(req)
if err == nil {
if resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusNotModified {
respETag := etagValue(resp.Header.Get("ETag"))
// If a 304 is returned without an ETag and we had only sent one ETag,
// the response refers to the ETag we asked about.
if respETag == "" && onlyETag != "" && resp.StatusCode == http.StatusNotModified {
respETag = onlyETag
}
md, ok := m[respETag]
if ok {
hs.refID = md.ID()
dgst := md.getHTTPChecksum()
if dgst != "" {
hs.cacheKey = dgst
modTime := md.getHTTPModTime()
resp.Body.Close()
return hs.formatCacheKey(getFileName(hs.src.URL, hs.src.Filename, resp), dgst, modTime).String(), dgst.String(), nil, true, nil
}
}
}
resp.Body.Close()
}
req.Method = "GET"
// Unset explicit Accept-Encoding for GET, otherwise the go http library will not
// transparently decompress the response body when it is gzipped. It will still add
// this header implicitly when the request is made though.
req.Header.Del("Accept-Encoding")
}
resp, err := client.Do(req)
if err != nil {
return "", "", nil, false, err
}
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
return "", "", nil, false, errors.Errorf("invalid response status %d", resp.StatusCode)
}
if resp.StatusCode == http.StatusNotModified {
respETag := etagValue(resp.Header.Get("ETag"))
if respETag == "" && onlyETag != "" {
respETag = onlyETag
// Set the missing ETag header on the response so that it's available
// to .save()
resp.Header.Set("ETag", onlyETag)
}
md, ok := m[respETag]
if !ok {
return "", "", nil, false, errors.Errorf("invalid not-modified ETag: %v", respETag)
}
hs.refID = md.ID()
dgst := md.getHTTPChecksum()
if dgst == "" {
return "", "", nil, false, errors.Errorf("invalid metadata change")
}
hs.cacheKey = dgst
modTime := md.getHTTPModTime()
resp.Body.Close()
return hs.formatCacheKey(getFileName(hs.src.URL, hs.src.Filename, resp), dgst, modTime).String(), dgst.String(), nil, true, nil
}
ref, dgst, err := hs.save(ctx, resp, g)
if err != nil {
return "", "", nil, false, err
}
ref.Release(context.TODO())
hs.cacheKey = dgst
return hs.formatCacheKey(getFileName(hs.src.URL, hs.src.Filename, resp), dgst, resp.Header.Get("Last-Modified")).String(), dgst.String(), nil, true, nil
}
func (hs *httpSourceHandler) save(ctx context.Context, resp *http.Response, s session.Group) (ref cache.ImmutableRef, dgst digest.Digest, retErr error) {
newRef, err := hs.cache.New(ctx, nil, s, cache.CachePolicyRetain, cache.WithDescription(fmt.Sprintf("http url %s", hs.src.URL)))
if err != nil {
return nil, "", err
}
releaseRef := func() {
newRef.Release(context.TODO())
}
defer func() {
if retErr != nil && newRef != nil {
releaseRef()
}
}()
mount, err := newRef.Mount(ctx, false, s)
if err != nil {
return nil, "", err
}
lm := snapshot.LocalMounter(mount)
dir, err := lm.Mount()
if err != nil {
return nil, "", err
}
defer func() {
if retErr != nil && lm != nil {
lm.Unmount()
}
}()
perm := 0600
if hs.src.Perm != 0 {
perm = hs.src.Perm
}
fp := filepath.Join(dir, getFileName(hs.src.URL, hs.src.Filename, resp))
f, err := os.OpenFile(fp, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(perm))
if err != nil {
return nil, "", err
}
defer func() {
if f != nil {
f.Close()
}
}()
h := sha256.New()
if _, err := io.Copy(io.MultiWriter(f, h), resp.Body); err != nil {
return nil, "", err
}
if err := f.Close(); err != nil {
return nil, "", err
}
f = nil
uid := hs.src.UID
gid := hs.src.GID
if idmap := mount.IdentityMapping(); idmap != nil {
uid, gid, err = idmap.ToHost(uid, gid)
if err != nil {
return nil, "", err
}
}
if gid != 0 || uid != 0 {
if err := os.Chown(fp, uid, gid); err != nil {
return nil, "", err
}
}
mTime := time.Unix(0, 0)
lastMod := resp.Header.Get("Last-Modified")
if lastMod != "" {
if parsedMTime, err := http.ParseTime(lastMod); err == nil {
mTime = parsedMTime
}
}
if err := os.Chtimes(fp, mTime, mTime); err != nil {
return nil, "", err
}
lm.Unmount()
lm = nil
ref, err = newRef.Commit(ctx)
if err != nil {
return nil, "", err
}
newRef = nil
md := cacheRefMetadata{ref}
hs.refID = ref.ID()
dgst = digest.NewDigest(digest.SHA256, h)
if respETag := resp.Header.Get("ETag"); respETag != "" {
respETag = etagValue(respETag)
if err := md.setETag(respETag); err != nil {
return nil, "", err
}
uh, err := hs.urlHash()
if err != nil {
return nil, "", err
}
if err := md.setHTTPChecksum(uh, dgst); err != nil {
return nil, "", err
}
}
if modTime := resp.Header.Get("Last-Modified"); modTime != "" {
if err := md.setHTTPModTime(modTime); err != nil {
return nil, "", err
}
}
return ref, dgst, nil
}
func (hs *httpSourceHandler) Snapshot(ctx context.Context, g session.Group) (cache.ImmutableRef, error) {
if hs.refID != "" {
ref, err := hs.cache.Get(ctx, hs.refID, nil)
if err != nil {
bklog.G(ctx).WithError(err).Warnf("failed to get HTTP snapshot for ref %s (%s)", hs.refID, hs.src.URL)
} else {
return ref, nil
}
}
req, err := hs.newHTTPRequest(ctx, g)
if err != nil {
return nil, err
}
client := hs.client(g)
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer func() {
_ = resp.Body.Close()
}()
ref, dgst, err := hs.save(ctx, resp, g)
if err != nil {
return nil, err
}
if dgst != hs.cacheKey {
ref.Release(context.TODO())
return nil, errors.Errorf("digest mismatch %s: %s", dgst, hs.cacheKey)
}
return ref, nil
}
func (hs *httpSourceHandler) newHTTPRequest(ctx context.Context, g session.Group) (*http.Request, error) {
req, err := http.NewRequest(http.MethodGet, hs.src.URL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", version.UserAgent())
for _, field := range hs.src.Header {
req.Header.Set(field.Name, field.Value)
}
type authSecret struct {
name string
token bool
}
var secretNames []authSecret
if hs.src.AuthHeaderSecret != "" {
secretNames = append(secretNames, authSecret{name: hs.src.AuthHeaderSecret})
} else {
u, err := url.Parse(hs.src.URL)
if err == nil {
secretNames = append(secretNames, authSecret{name: HTTPAuthHeaderSecretPrefix + u.Hostname()})
secretNames = append(secretNames, authSecret{name: HTTPAuthTokenSecretPrefix + u.Hostname(), token: true})
}
}
for _, secret := range secretNames {
err := hs.sm.Any(ctx, g, func(ctx context.Context, _ string, caller session.Caller) error {
dt, err := secrets.GetSecret(ctx, caller, secret.name)
if err != nil {
return err
}
v := string(dt)
if secret.token {
v = "Bearer " + v
}
req.Header.Set("Authorization", v)
return nil
})
if err != nil && hs.src.AuthHeaderSecret != "" {
return nil, errors.Wrapf(err, "failed to retrieve HTTP auth secret %s", hs.src.AuthHeaderSecret)
}
}
return req.WithContext(ctx), nil
}
func getFileName(urlStr, manualFilename string, resp *http.Response) string {
if manualFilename != "" {
return manualFilename
}
if resp != nil {
if contentDisposition := resp.Header.Get("Content-Disposition"); contentDisposition != "" {
if _, params, err := mime.ParseMediaType(contentDisposition); err == nil {
if params["filename"] != "" && !strings.HasSuffix(params["filename"], "/") {
if filename := filepath.Base(filepath.FromSlash(params["filename"])); filename != "" {
return filename
}
}
}
}
}
u, err := url.Parse(urlStr)
if err == nil {
if base := path.Base(u.Path); base != "." && base != "/" {
return base
}
}
return "download"
}
func searchHTTPURLDigest(ctx context.Context, store cache.MetadataStore, dgst digest.Digest) ([]cacheRefMetadata, error) {
var results []cacheRefMetadata
mds, err := store.Search(ctx, string(dgst), false)
if err != nil {
return nil, err
}
for _, md := range mds {
results = append(results, cacheRefMetadata{md})
}
return results, nil
}
type cacheRefMetadata struct {
cache.RefMetadata
}
const (
keyHTTPChecksum = "http.checksum"
keyETag = "etag"
keyModTime = "http.modtime"
)
func (md cacheRefMetadata) getHTTPChecksum() digest.Digest {
return digest.Digest(md.GetString(keyHTTPChecksum))
}
func (md cacheRefMetadata) setHTTPChecksum(urlDgst digest.Digest, d digest.Digest) error {
return md.SetString(keyHTTPChecksum, d.String(), urlDgst.String())
}
func (md cacheRefMetadata) getETag() string {
return md.GetString(keyETag)
}
func (md cacheRefMetadata) setETag(s string) error {
return md.SetString(keyETag, s, "")
}
func (md cacheRefMetadata) getHTTPModTime() string {
return md.GetString(keyModTime)
}
func (md cacheRefMetadata) setHTTPModTime(s string) error {
return md.SetString(keyModTime, s, "")
}
func etagValue(v string) string {
// remove weak for direct comparison
return strings.TrimPrefix(v, "W/")
}