1
0
mirror of https://github.com/moby/buildkit.git synced 2025-08-08 10:02:07 +03:00

add cache key debuginfo lookup

This allows opt-in to cache key debug database on
daemon startup.

If enabled, all cache keys generated by builds are
saved into this database together with the plaintexts
of the original data so a reverse lookup can be performed
later to compare two checksums and find out their original
difference. If checksum contains other checksums internally
then these are saved as well. For storage constraints, the
plaintext of file content is not saved but the metadata
portion can be still looked up.

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
Tonis Tiigi
2025-06-29 21:29:57 -07:00
parent 7bf2360705
commit 4c9d94f93c
16 changed files with 819 additions and 26 deletions

View File

@@ -3,7 +3,6 @@ package contenthash
import (
"bytes"
"context"
"crypto/sha256"
"io"
"os"
"path"
@@ -18,6 +17,7 @@ import (
"github.com/moby/buildkit/cache"
"github.com/moby/buildkit/session"
"github.com/moby/buildkit/snapshot"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/locker"
"github.com/moby/patternmatcher"
digest "github.com/opencontainers/go-digest"
@@ -450,15 +450,15 @@ func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable,
return digest.Digest(includedPaths[0].record.Digest), nil
}
digester := digest.Canonical.Digester()
h := cachedigest.NewHash(cachedigest.TypeFileList)
for i, w := range includedPaths {
if i != 0 {
digester.Hash().Write([]byte{0})
h.Write([]byte{0})
}
digester.Hash().Write([]byte(path.Base(w.path)))
digester.Hash().Write([]byte(w.record.Digest))
h.Write([]byte(path.Base(w.path)))
h.Write([]byte(w.record.Digest))
}
return digester.Digest(), nil
return h.Sum(), nil
}
func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, opts ChecksumOpts) ([]*includedPath, error) {
@@ -881,7 +881,7 @@ func (cc *cacheContext) checksum(ctx context.Context, root *iradix.Node[*CacheRe
switch cr.Type {
case CacheRecordTypeDir:
h := sha256.New()
h := cachedigest.NewHash(cachedigest.TypeFileList)
next := append(k, 0)
iter := root.Iterator()
iter.SeekLowerBound(append(slices.Clone(next), 0))
@@ -906,7 +906,7 @@ func (cc *cacheContext) checksum(ctx context.Context, root *iradix.Node[*CacheRe
}
subk, _, ok = iter.Next()
}
dgst = digest.NewDigest(digest.SHA256, h)
dgst = h.Sum()
default:
p := convertKeyToPath(bytes.TrimSuffix(k, []byte{0}))

View File

@@ -2,12 +2,13 @@ package contenthash
import (
"archive/tar"
"crypto/sha256"
"encoding/hex"
"hash"
"os"
"path/filepath"
"time"
"github.com/moby/buildkit/util/cachedigest"
"github.com/pkg/errors"
fstypes "github.com/tonistiigi/fsutil/types"
)
@@ -62,13 +63,14 @@ func NewFromStat(stat *fstypes.Stat) (hash.Hash, error) {
}
}
// fmt.Printf("hdr: %#v\n", hdr)
tsh := &tarsumHash{hdr: hdr, Hash: sha256.New()}
h := cachedigest.NewHash(cachedigest.TypeFile)
tsh := &tarsumHash{hdr: hdr, Hash: h}
tsh.Reset() // initialize header
return tsh, nil
}
type tarsumHash struct {
hash.Hash
*cachedigest.Hash
hdr *tar.Header
}
@@ -79,6 +81,19 @@ func (tsh *tarsumHash) Reset() {
WriteV1TarsumHeaders(tsh.hdr, tsh.Hash)
}
func (tsh *tarsumHash) Write(p []byte) (n int, err error) {
n, err = tsh.WriteNoDebug(p)
if n > 0 {
tsh.hdr.Size += int64(n)
}
return n, err
}
func (tsh *tarsumHash) Sum(_ []byte) []byte {
b, _ := hex.DecodeString(tsh.Hash.Sum().Hex())
return b
}
type statInfo struct {
*fstypes.Stat
}

View File

@@ -1,6 +1,8 @@
package main
import (
"context"
"encoding/json"
"expvar"
"net/http"
"net/http/pprof"
@@ -10,6 +12,9 @@ import (
"time"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/cachedigest"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/net/trace"
)
@@ -24,6 +29,8 @@ func setupDebugHandlers(addr string) error {
m.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
m.Handle("/debug/requests", http.HandlerFunc(trace.Traces))
m.Handle("/debug/events", http.HandlerFunc(trace.Events))
m.Handle("/debug/cache/all", http.HandlerFunc(handleCacheAll))
m.Handle("/debug/cache/lookup", http.HandlerFunc(handleCacheLookup))
m.Handle("/debug/gc", http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
runtime.GC()
@@ -59,3 +66,137 @@ func setupDebugHandlers(addr string) error {
}()
return nil
}
func handleCacheAll(w http.ResponseWriter, r *http.Request) {
records, err := loadCacheAll(r.Context())
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
switch r.Header.Get("Accept") {
case "application/json":
w.Header().Set("Content-Type", "application/json")
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
enc.Encode(records)
default:
w.Header().Set("Content-Type", "text/plain")
for _, rec := range records {
w.Write([]byte(rec.Digest.String() + " (" + rec.Type.String() + "):\n"))
for _, subRec := range rec.SubRecords {
w.Write([]byte(" " + subRec.Digest.String() + " (" + subRec.Type.String() + "):\n"))
}
for _, frame := range rec.Data {
switch frame.ID {
case cachedigest.FrameIDData:
w.Write([]byte(" " + frame.ID.String() + ": " + string(frame.Data) + "\n"))
case cachedigest.FrameIDSkip:
w.Write([]byte(" skipping " + string(frame.Data) + " bytes\n"))
}
}
w.Write([]byte("\n"))
}
}
}
func handleCacheLookup(w http.ResponseWriter, r *http.Request) {
dgstStr := r.URL.Query().Get("digest")
if dgstStr == "" {
http.Error(w, "digest query parameter is required", http.StatusBadRequest)
return
}
dgst, err := digest.Parse(dgstStr)
if err != nil {
http.Error(w, "invalid digest: "+err.Error(), http.StatusBadRequest)
return
}
record, err := cacheRecordLookup(r.Context(), dgst)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
switch r.Header.Get("Accept") {
case "application/json":
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(record); err != nil {
http.Error(w, "failed to encode record: "+err.Error(), http.StatusInternalServerError)
return
}
default:
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(record.Digest.String() + " (" + record.Type.String() + "):\n"))
for _, subRec := range record.SubRecords {
w.Write([]byte(" " + subRec.Digest.String() + " (" + subRec.Type.String() + "):\n"))
}
for _, frame := range record.Data {
switch frame.ID {
case cachedigest.FrameIDData:
w.Write([]byte(" " + frame.ID.String() + ": " + string(frame.Data) + "\n"))
case cachedigest.FrameIDSkip:
w.Write([]byte(" skipping " + string(frame.Data) + " bytes\n"))
}
}
}
}
func cacheRecordLookup(ctx context.Context, dgst digest.Digest) (*cachedigest.Record, error) {
db := cachedigest.GetDefaultDB()
typ, frames, err := db.Get(ctx, dgst.String())
if err != nil {
return nil, errors.Wrapf(err, "failed to get digest %s from cache", dgst.String())
}
record := &cachedigest.Record{
Digest: dgst,
Type: typ,
Data: frames,
}
if err := record.LoadSubRecords(func(d digest.Digest) (cachedigest.Type, []cachedigest.Frame, error) {
typ, frames, err := db.Get(ctx, d.String())
if err != nil {
return "", nil, errors.Wrapf(err, "failed to load sub-record for %s", d.String())
}
return typ, frames, nil
}); err != nil {
return nil, errors.Wrapf(err, "failed to load sub-records for %s", dgst.String())
}
return record, nil
}
func loadCacheAll(ctx context.Context) ([]*cachedigest.Record, error) {
var records []*cachedigest.Record
m := map[digest.Digest]*cachedigest.Record{}
db := cachedigest.GetDefaultDB()
err := db.All(ctx, func(key string, typ cachedigest.Type, frames []cachedigest.Frame) error {
dgst, err := digest.Parse(key)
if err != nil {
return errors.Wrapf(err, "failed to parse digest %q", key)
}
r := &cachedigest.Record{
Digest: dgst,
Type: typ,
Data: frames,
}
records = append(records, r)
m[dgst] = r
return nil
})
if err != nil {
return nil, err
}
for _, rec := range records {
if err := rec.LoadSubRecords(func(d digest.Digest) (cachedigest.Type, []cachedigest.Frame, error) {
rec, ok := m[d]
if !ok {
return "", nil, errors.Errorf("digest %s not found in cache", d)
}
return rec.Type, rec.Data, nil
}); err != nil {
return nil, errors.Wrapf(err, "failed to load sub-records for %s", rec.Digest.String())
}
}
return records, nil
}

View File

@@ -45,6 +45,7 @@ import (
"github.com/moby/buildkit/util/appdefaults"
"github.com/moby/buildkit/util/archutil"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/buildkit/util/db/boltutil"
"github.com/moby/buildkit/util/disk"
"github.com/moby/buildkit/util/grpcerrors"
@@ -225,6 +226,10 @@ func main() {
Name: "cdi-spec-dir",
Usage: "list of directories to scan for CDI spec files",
},
cli.BoolFlag{
Name: "save-cache-debug",
Usage: "enable saving cache debug info",
},
)
app.Flags = append(app.Flags, appFlags...)
app.Flags = append(app.Flags, serviceFlags()...)
@@ -345,6 +350,15 @@ func main() {
return err
}
if c.GlobalBool("save-cache-debug") {
db, err := cachedigest.NewDB(filepath.Join(cfg.Root, "cache-debug.db"))
if err != nil {
return errors.Wrap(err, "failed to create cache debug db")
}
cachedigest.SetDefaultDB(db)
defer db.Close()
}
controller, err := newController(ctx, c, &cfg)
if err != nil {
return err

View File

@@ -9,6 +9,7 @@ import (
"github.com/moby/buildkit/identity"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/cachedigest"
digest "github.com/opencontainers/go-digest"
"github.com/sirupsen/logrus"
)
@@ -448,8 +449,9 @@ func (c *cacheManager) getIDFromDeps(k *CacheKey) string {
}
func rootKey(dgst digest.Digest, output Index) digest.Digest {
dgst, _ = cachedigest.FromBytes(fmt.Appendf(nil, "%s@%d", dgst, output), cachedigest.TypeString)
if strings.HasPrefix(dgst.String(), "random:") {
return digest.Digest("random:" + digest.FromBytes(fmt.Appendf(nil, "%s@%d", dgst, output)).Encoded())
return digest.Digest("random:" + dgst.Encoded())
}
return digest.FromBytes(fmt.Appendf(nil, "%s@%d", dgst, output))
return dgst
}

View File

@@ -13,6 +13,7 @@ import (
"github.com/moby/buildkit/solver"
"github.com/moby/buildkit/solver/llbsolver/ops/opsutils"
"github.com/moby/buildkit/solver/pb"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/buildkit/worker"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
@@ -51,8 +52,12 @@ func (b *BuildOp) CacheMap(ctx context.Context, g session.Group, index int) (*so
return nil, false, err
}
dgst, err := cachedigest.FromBytes(dt, cachedigest.TypeJSON)
if err != nil {
return nil, false, err
}
return &solver.CacheMap{
Digest: digest.FromBytes(dt),
Digest: dgst,
Deps: make([]struct {
Selector digest.Digest
ComputeDigestFunc solver.ResultBasedCacheFunc

View File

@@ -23,6 +23,7 @@ import (
"github.com/moby/buildkit/solver/llbsolver/mounts"
"github.com/moby/buildkit/solver/llbsolver/ops/opsutils"
"github.com/moby/buildkit/solver/pb"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/buildkit/util/progress/logs"
utilsystem "github.com/moby/buildkit/util/system"
"github.com/moby/buildkit/worker"
@@ -173,8 +174,12 @@ func (e *ExecOp) CacheMap(ctx context.Context, g session.Group, index int) (*sol
return nil, false, err
}
dgst, err := cachedigest.FromBytes(dt, cachedigest.TypeJSON)
if err != nil {
return nil, false, err
}
cm := &solver.CacheMap{
Digest: digest.FromBytes(dt),
Digest: dgst,
Deps: make([]struct {
Selector digest.Digest
ComputeDigestFunc solver.ResultBasedCacheFunc

View File

@@ -19,6 +19,7 @@ import (
"github.com/moby/buildkit/solver/llbsolver/ops/fileoptypes"
"github.com/moby/buildkit/solver/llbsolver/ops/opsutils"
"github.com/moby/buildkit/solver/pb"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/buildkit/util/flightcontrol"
"github.com/moby/buildkit/worker"
digest "github.com/opencontainers/go-digest"
@@ -134,8 +135,12 @@ func (f *fileOp) CacheMap(ctx context.Context, g session.Group, index int) (*sol
return nil, false, err
}
dgst, err := cachedigest.FromBytes(dt, cachedigest.TypeJSON)
if err != nil {
return nil, false, err
}
cm := &solver.CacheMap{
Digest: digest.FromBytes(dt),
Digest: dgst,
Deps: make([]struct {
Selector digest.Digest
ComputeDigestFunc solver.ResultBasedCacheFunc
@@ -147,13 +152,17 @@ func (f *fileOp) CacheMap(ctx context.Context, g session.Group, index int) (*sol
if _, ok := invalidSelectors[idx]; ok {
continue
}
dgsts := make([][]byte, 0, len(m))
paths := make([][]byte, 0, len(m))
for _, k := range m {
dgsts = append(dgsts, []byte(k.Path))
paths = append(paths, []byte(k.Path))
}
slices.SortFunc(dgsts, bytes.Compare)
slices.Reverse(dgsts) // historical reasons
cm.Deps[idx].Selector = digest.FromBytes(bytes.Join(dgsts, []byte{0}))
slices.SortFunc(paths, bytes.Compare)
slices.Reverse(paths) // historical reasons
dgst, err := cachedigest.FromBytes(bytes.Join(paths, []byte{0}), cachedigest.TypeStringArray)
if err != nil {
return nil, false, err
}
cm.Deps[idx].Selector = dgst
cm.Deps[idx].ComputeDigestFunc = opsutils.NewContentHashFunc(dedupeSelectors(m))
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/buildkit/worker"
"github.com/pkg/errors"
@@ -46,8 +47,12 @@ func (m *mergeOp) CacheMap(ctx context.Context, group session.Group, index int)
return nil, false, err
}
dgst, err := cachedigest.FromBytes(dt, cachedigest.TypeJSON)
if err != nil {
return nil, false, err
}
cm := &solver.CacheMap{
Digest: digest.FromBytes(dt),
Digest: dgst,
Deps: make([]struct {
Selector digest.Digest
ComputeDigestFunc solver.ResultBasedCacheFunc

View File

@@ -8,6 +8,7 @@ import (
"github.com/moby/buildkit/cache/contenthash"
"github.com/moby/buildkit/session"
"github.com/moby/buildkit/solver"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/buildkit/worker"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
@@ -66,6 +67,6 @@ func NewContentHashFunc(selectors []Selector) solver.ResultBasedCacheFunc {
return "", err
}
return digest.FromBytes(bytes.Join(dgsts, []byte{0})), nil
return cachedigest.FromBytes(bytes.Join(dgsts, []byte{0}), cachedigest.TypeDigestArray)
}
}

View File

@@ -10,6 +10,7 @@ import (
"github.com/moby/buildkit/solver/llbsolver/ops/opsutils"
"github.com/moby/buildkit/solver/pb"
"github.com/moby/buildkit/source"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/buildkit/worker"
digest "github.com/opencontainers/go-digest"
"golang.org/x/sync/semaphore"
@@ -88,7 +89,10 @@ func (s *SourceOp) CacheMap(ctx context.Context, g session.Group, index int) (*s
s.pin = pin
}
dgst := digest.FromBytes([]byte(sourceCacheType + ":" + k))
dgst, err := cachedigest.FromBytes([]byte(sourceCacheType+":"+k), cachedigest.TypeString)
if err != nil {
return nil, false, err
}
if strings.HasPrefix(k, "session:") {
dgst = digest.Digest("random:" + dgst.Encoded())
}

View File

@@ -19,10 +19,10 @@ import (
"github.com/moby/buildkit/source"
srctypes "github.com/moby/buildkit/source/types"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/cachedigest"
"github.com/moby/buildkit/util/progress"
"github.com/moby/patternmatcher"
"github.com/moby/sys/user"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/tonistiigi/fsutil"
fstypes "github.com/tonistiigi/fsutil/types"
@@ -154,7 +154,11 @@ func (ls *localSourceHandler) CacheKey(ctx context.Context, g session.Group, ind
if err != nil {
return "", "", nil, false, err
}
return "session:" + ls.src.Name + ":" + digest.FromBytes(dt).String(), digest.FromBytes(dt).String(), nil, true, nil
dgst, err := cachedigest.FromBytes(dt, cachedigest.TypeJSON)
if err != nil {
return "", "", nil, false, err
}
return "session:" + ls.src.Name + ":" + dgst.String(), dgst.String(), nil, true, nil
}
func (ls *localSourceHandler) Snapshot(ctx context.Context, g session.Group) (cache.ImmutableRef, error) {

168
util/cachedigest/db.go Normal file
View File

@@ -0,0 +1,168 @@
package cachedigest
import (
"context"
"crypto/sha256"
"sync"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"go.etcd.io/bbolt"
)
var ErrInvalidEncoding = errors.Errorf("invalid encoding")
var ErrNotFound = errors.Errorf("not found")
const bucketName = "byhash"
type DB struct {
db *bbolt.DB
wg sync.WaitGroup
}
var defaultDB = &DB{}
func SetDefaultDB(db *DB) {
defaultDB = db
}
func GetDefaultDB() *DB {
return defaultDB
}
func NewDB(path string) (*DB, error) {
db, err := bbolt.Open(path, 0600, nil)
if err != nil {
return nil, err
}
return &DB{db: db}, nil
}
func (d *DB) Close() error {
if d.db != nil {
d.wg.Wait()
return d.db.Close()
}
return nil
}
func (d *DB) NewHash(typ Type) *Hash {
return &Hash{
h: sha256.New(),
typ: typ,
db: d,
}
}
func (d *DB) FromBytes(dt []byte, typ Type) (digest.Digest, error) {
dgst := digest.FromBytes(dt)
d.saveFrames(dgst.String(), []Frame{
{ID: FrameIDType, Data: []byte(string(typ))},
{ID: FrameIDData, Data: dt},
})
return dgst, nil
}
func (d *DB) saveFrames(key string, frames []Frame) {
if d.db == nil {
return
}
d.wg.Add(1)
go func() {
defer d.wg.Done()
val, err := encodeFrames(frames)
if err != nil {
// Optionally log error
return
}
_ = d.db.Update(func(tx *bbolt.Tx) error {
b, err := tx.CreateBucketIfNotExists([]byte(bucketName))
if err != nil {
return err
}
return b.Put([]byte(key), val)
})
}()
}
func (d *DB) Get(ctx context.Context, dgst string) (Type, []Frame, error) {
if d.db == nil {
return "", nil, errors.WithStack(ErrNotFound)
}
parsed, err := digest.Parse(dgst)
if err != nil {
return "", nil, errors.Wrap(err, "invalid digest key")
}
var typ Type
var resultFrames []Frame
err = d.db.View(func(tx *bbolt.Tx) error {
b := tx.Bucket([]byte(bucketName))
if b == nil {
return errors.WithStack(ErrNotFound)
}
val := b.Get([]byte(parsed.String()))
if val == nil {
return errors.WithStack(ErrNotFound)
}
frames, err := decodeFrames(val)
if err != nil {
return err
}
for _, f := range frames {
switch f.ID {
case FrameIDType:
typ = Type(f.Data)
case FrameIDData, FrameIDSkip:
resultFrames = append(resultFrames, f)
}
}
return nil
})
if err != nil {
return "", nil, err
}
return typ, resultFrames, nil
}
func (d *DB) All(ctx context.Context, cb func(key string, typ Type, frames []Frame) error) error {
if d.db == nil {
return nil
}
return d.db.View(func(tx *bbolt.Tx) error {
select {
case <-ctx.Done():
return context.Cause(ctx)
default:
}
b := tx.Bucket([]byte(bucketName))
if b == nil {
return nil
}
return b.ForEach(func(k, v []byte) error {
keyStr := string(k)
_, err := digest.Parse(keyStr)
if err != nil {
return errors.Wrapf(err, "invalid digest key: %s", keyStr)
}
frames, err := decodeFrames(v)
if err != nil {
return err
}
var typ Type
var dataFrames []Frame
for _, f := range frames {
switch f.ID {
case FrameIDType:
typ = Type(f.Data)
case FrameIDData, FrameIDSkip:
dataFrames = append(dataFrames, f)
}
}
return cb(keyStr, typ, dataFrames)
})
})
}
func (d *DB) Wait() {
d.wg.Wait()
}

193
util/cachedigest/db_test.go Normal file
View File

@@ -0,0 +1,193 @@
package cachedigest
import (
"context"
"os"
"path/filepath"
"slices"
"testing"
digest "github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func tempDB(t *testing.T) (*DB, func()) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
db, err := NewDB(dbPath)
require.NoError(t, err)
SetDefaultDB(db) // Ensure defaultDB is set for correct test behavior
return db, func() {
db.Close()
os.RemoveAll(dir)
SetDefaultDB(&DB{}) // Reset defaultDB after test
}
}
func TestFromBytesAndGet(t *testing.T) {
db, cleanup := tempDB(t)
defer cleanup()
data := []byte("hello world")
typ := TypeString
dgst, err := db.FromBytes(data, typ)
require.NoError(t, err)
require.NotEqual(t, digest.Digest(""), dgst)
db.Wait()
gotType, frames, err := db.Get(context.Background(), dgst.String())
require.NoError(t, err)
require.Equal(t, typ, gotType)
var foundData bool
for _, f := range frames {
if f.ID == FrameIDData {
require.Equal(t, data, f.Data)
foundData = true
}
}
require.True(t, foundData, "should find data frame")
_, _, err = db.Get(context.Background(), digest.FromBytes([]byte("notfound")).String())
require.ErrorIs(t, err, ErrNotFound)
}
func TestNewHashAndGet(t *testing.T) {
db, cleanup := tempDB(t)
defer cleanup()
h := db.NewHash(TypeStringArray)
inputs := [][]byte{
[]byte("foo"),
[]byte("bar"),
}
for _, in := range inputs {
_, err := h.Write(in)
require.NoError(t, err)
}
skip1 := []byte("xxxxx")
skip2 := []byte("yyy")
_, err := h.WriteNoDebug(skip1)
require.NoError(t, err)
_, err = h.WriteNoDebug(skip2)
require.NoError(t, err)
sum := h.Sum()
db.Wait()
expectedConcat := slices.Concat(inputs[0], inputs[1], skip1, skip2)
expectedHash := digest.FromBytes(expectedConcat)
require.Equal(t, expectedHash, sum, "digest sum should match expected value")
gotType, frames, err := db.Get(context.Background(), sum.String())
require.NoError(t, err)
require.Equal(t, TypeStringArray, gotType)
var dataFrames [][]byte
var skipLens []uint32
for _, f := range frames {
switch f.ID {
case FrameIDData:
dataFrames = append(dataFrames, f.Data)
case FrameIDSkip:
require.Len(t, f.Data, 4)
skipLens = append(skipLens, uint32(f.Data[0])<<24|uint32(f.Data[1])<<16|uint32(f.Data[2])<<8|uint32(f.Data[3]))
}
}
require.Len(t, dataFrames, len(inputs))
for i, in := range inputs {
require.Equal(t, in, dataFrames[i])
}
require.Equal(t, []uint32{uint32(len(skip1) + len(skip2))}, skipLens)
}
func TestEncodeDecodeFrames(t *testing.T) {
framesIn := []Frame{
{FrameIDType, []byte(TypeJSON)},
{FrameIDData, []byte("hello world")},
}
encoded, err := encodeFrames(framesIn)
require.NoError(t, err, "encodeFrames should not error")
decoded, err := decodeFrames(encoded)
require.NoError(t, err, "decodeFrames should not error")
assert.Equal(t, len(framesIn), len(decoded), "number of frames should match")
for i, f := range framesIn {
assert.Equal(t, f.ID, decoded[i].ID, "frame id should match")
assert.Equal(t, f.Data, decoded[i].Data, "frame data should match")
}
}
func TestDecodeFramesInvalid(t *testing.T) {
// Too short
_, err := decodeFrames([]byte{0, 1, 2})
require.Error(t, err, "should error for short input")
// Length mismatch
bad := make([]byte, 12)
// frameID=1, len=10, but only 4 bytes of data
bad[0] = 0
bad[1] = 0
bad[2] = 0
bad[3] = 1
bad[4] = 0
bad[5] = 0
bad[6] = 0
bad[7] = 10
copy(bad[8:], []byte{1, 2, 3, 4})
_, err = decodeFrames(bad)
require.Error(t, err, "should error for length mismatch")
require.ErrorIs(t, err, ErrInvalidEncoding, "should return ErrInvalidFrameLength")
}
func TestAll(t *testing.T) {
db, cleanup := tempDB(t)
defer cleanup()
records := []struct {
data []byte
typ Type
}{
{[]byte("foo"), TypeString},
{[]byte("bar"), TypeStringArray},
{[]byte("baz"), TypeDigestArray},
}
var digests []string
for _, rec := range records {
dgst, err := db.FromBytes(rec.data, rec.typ)
require.NoError(t, err)
digests = append(digests, dgst.String())
}
db.Wait()
found := make(map[string]struct {
typ Type
frames []Frame
})
err := db.All(context.TODO(), func(key string, typ Type, frames []Frame) error {
found[key] = struct {
typ Type
frames []Frame
}{typ, frames}
return nil
})
require.NoError(t, err)
require.Len(t, found, len(records))
for i, rec := range records {
dgst := digests[i]
val, ok := found[dgst]
require.True(t, ok, "digest %s not found", dgst)
require.Equal(t, rec.typ, val.typ)
require.Len(t, val.frames, 1)
require.Equal(t, FrameIDData, val.frames[0].ID)
require.Equal(t, rec.data, val.frames[0].Data)
}
}

159
util/cachedigest/digest.go Normal file
View File

@@ -0,0 +1,159 @@
package cachedigest
import (
"bytes"
"encoding/binary"
"hash"
"regexp"
"sync"
"github.com/moby/buildkit/util/bklog"
digest "github.com/opencontainers/go-digest"
)
type Type string
const (
TypeJSON Type = "json"
TypeString Type = "string"
TypeStringArray Type = "string-array"
TypeDigestArray Type = "digest-array"
TypeFileList Type = "file-list"
TypeFile Type = "file"
)
func (t Type) String() string {
return string(t)
}
func NewHash(typ Type) *Hash {
return defaultDB.NewHash(typ)
}
func FromBytes(dt []byte, t Type) (digest.Digest, error) {
return defaultDB.FromBytes(dt, t)
}
type Hash struct {
h hash.Hash
typ Type
db *DB
frames []Frame
}
func (h *Hash) Reset() {
h.h.Reset()
h.frames = h.frames[:0]
}
func (h *Hash) BlockSize() int {
return h.h.BlockSize()
}
func (h *Hash) Size() int {
return h.h.Size()
}
func (h *Hash) Write(p []byte) (n int, err error) {
n, err = h.h.Write(p)
if n > 0 && h.db != nil {
h.frames = append(h.frames, Frame{ID: FrameIDData, Data: bytes.Clone(p[:n])})
}
return n, err
}
func (h *Hash) WriteNoDebug(p []byte) (n int, err error) {
n, err = h.h.Write(p)
if n > 0 && h.db != nil {
if len(h.frames) > 0 && h.frames[len(h.frames)-1].ID == FrameIDSkip {
last := &h.frames[len(h.frames)-1]
prevLen := binary.BigEndian.Uint32(last.Data)
binary.BigEndian.PutUint32(last.Data, prevLen+uint32(n))
} else {
lenBytes := make([]byte, 4)
binary.BigEndian.PutUint32(lenBytes, uint32(n))
h.frames = append(h.frames, Frame{ID: FrameIDSkip, Data: lenBytes})
}
}
return n, err
}
func (h *Hash) Sum() digest.Digest {
sum := digest.NewDigest(digest.SHA256, h.h)
if h.db != nil && len(h.frames) > 0 {
frames := []Frame{
{ID: FrameIDType, Data: []byte(string(h.typ))},
}
frames = append(frames, h.frames...)
h.db.saveFrames(sum.String(), frames)
}
return sum
}
type Record struct {
Digest digest.Digest
Type Type
Data []Frame
SubRecords []Record
}
var shaRegexpOnce = sync.OnceValue(func() *regexp.Regexp {
return regexp.MustCompile(`\bsha256:[a-f0-9]{64}\b`)
})
func (r *Record) LoadSubRecords(loader func(d digest.Digest) (Type, []Frame, error)) error {
var checksums []string
var dt []byte
for _, f := range r.Data {
if f.ID != FrameIDData {
continue
}
dt = append(dt, f.Data...)
}
switch r.Type {
case TypeString:
// find regex matches in the data
matches := shaRegexpOnce().FindAllSubmatch(dt, -1)
for _, match := range matches {
if len(match) > 0 {
checksums = append(checksums, string(match[0]))
}
}
case TypeDigestArray:
for _, dgst := range bytes.Split(dt, []byte{0}) {
checksums = append(checksums, string(dgst))
}
case TypeFileList:
for _, nameChecksumPair := range bytes.Split(dt, []byte{0}) {
idx := bytes.LastIndex(nameChecksumPair, []byte("sha256:"))
if idx < 0 {
bklog.L.Warnf("invalid file list entry %q, missing sha256 prefix", nameChecksumPair)
continue
}
checksums = append(checksums, string(nameChecksumPair[idx:]))
}
}
dgsts := make([]digest.Digest, 0, len(checksums))
for _, dgst := range checksums {
if d, err := digest.Parse(dgst); err == nil {
dgsts = append(dgsts, d)
} else {
bklog.L.Warnf("failed to parse debug info digest %q: %v", dgst, err)
}
}
for _, dgst := range dgsts {
typ, frames, err := loader(digest.Digest(dgst))
if err != nil {
bklog.L.Warnf("failed to load sub-record for %s: %v", dgst, err)
continue
}
r.SubRecords = append(r.SubRecords, Record{
Digest: digest.Digest(dgst),
Type: typ,
Data: frames,
})
}
return nil
}

68
util/cachedigest/frame.go Normal file
View File

@@ -0,0 +1,68 @@
package cachedigest
import (
"encoding/binary"
"github.com/pkg/errors"
)
type FrameID uint32
const (
FrameIDType FrameID = 1
FrameIDData FrameID = 2
FrameIDSkip FrameID = 3
)
func (f FrameID) String() string {
switch f {
case FrameIDType:
return "type"
case FrameIDData:
return "data"
case FrameIDSkip:
return "skip"
default:
return "unknown"
}
}
type Frame struct {
ID FrameID
Data []byte
}
// encodeFrames encodes a series of frames: [frameID:uint32][len:uint32][data:len]
func encodeFrames(frames []Frame) ([]byte, error) {
var out []byte
for _, f := range frames {
buf := make([]byte, 8+len(f.Data))
binary.BigEndian.PutUint32(buf[0:4], uint32(f.ID))
binary.BigEndian.PutUint32(buf[4:8], uint32(len(f.Data)))
copy(buf[8:], f.Data)
out = append(out, buf...)
}
return out, nil
}
// decodeFrames decodes a series of frames from data.
func decodeFrames(data []byte) ([]Frame, error) {
var frames []Frame
i := 0
for i+8 <= len(data) {
frameID := binary.BigEndian.Uint32(data[i : i+4])
length := binary.BigEndian.Uint32(data[i+4 : i+8])
if i+8+int(length) > len(data) {
return nil, errors.WithStack(ErrInvalidEncoding)
}
frames = append(frames, Frame{
ID: FrameID(frameID),
Data: data[i+8 : i+8+int(length)],
})
i += 8 + int(length)
}
if i != len(data) {
return nil, errors.WithStack(ErrInvalidEncoding)
}
return frames, nil
}