diff --git a/cmd/regctl/blob.go b/cmd/regctl/blob.go index 9151927..8b430b1 100644 --- a/cmd/regctl/blob.go +++ b/cmd/regctl/blob.go @@ -1,14 +1,21 @@ package main import ( + "archive/tar" + "encoding/json" + "fmt" "io" + "io/fs" "os" + "strings" + "time" // crypto libraries included for go-digest _ "crypto/sha256" _ "crypto/sha512" "github.com/opencontainers/go-digest" + "github.com/regclient/regclient/internal/diff" "github.com/regclient/regclient/pkg/template" "github.com/regclient/regclient/types" "github.com/regclient/regclient/types/ref" @@ -21,6 +28,22 @@ var blobCmd = &cobra.Command{ Aliases: []string{"layer"}, Short: "manage image blobs/layers", } +var blobDiffConfigCmd = &cobra.Command{ + Use: "diff-config ", + Short: "diff two image configs", + Long: `This returns the difference between two configs, comparing the contents of each config json.`, + Args: cobra.ExactArgs(4), + ValidArgs: []string{}, // do not auto complete repository or digest + RunE: runBlobDiffConfig, +} +var blobDiffLayerCmd = &cobra.Command{ + Use: "diff-layer ", + Short: "diff two tar layers", + Long: `This returns the difference between two layers, comparing the contents of each tar.`, + Args: cobra.ExactArgs(4), + ValidArgs: []string{}, // do not auto complete repository or digest + RunE: runBlobDiffLayer, +} var blobGetCmd = &cobra.Command{ Use: "get ", Aliases: []string{"pull"}, @@ -44,13 +67,23 @@ is the digest of the blob.`, } var blobOpts struct { - format string - formatPut string - mt string - digest string + diffCtx int + diffFullCtx bool + diffIgnoreTime bool + format string + formatPut string + mt string + digest string } func init() { + blobDiffConfigCmd.Flags().IntVarP(&blobOpts.diffCtx, "context", "", 3, "Lines of context") + blobDiffConfigCmd.Flags().BoolVarP(&blobOpts.diffFullCtx, "context-full", "", false, "Show all lines of context") + + blobDiffLayerCmd.Flags().IntVarP(&blobOpts.diffCtx, "context", "", 3, "Lines of context") + blobDiffLayerCmd.Flags().BoolVarP(&blobOpts.diffFullCtx, "context-full", "", false, "Show all lines of context") + blobDiffLayerCmd.Flags().BoolVarP(&blobOpts.diffIgnoreTime, "ignore-timestamp", "", false, "Ignore timestamps on files") + blobGetCmd.Flags().StringVarP(&blobOpts.format, "format", "", "{{printPretty .}}", "Format output with go template syntax") blobGetCmd.Flags().StringVarP(&blobOpts.mt, "media-type", "", "", "Set the requested mediaType (deprecated)") blobGetCmd.RegisterFlagCompletionFunc("format", completeArgNone) @@ -72,11 +105,145 @@ func init() { blobPutCmd.RegisterFlagCompletionFunc("digest", completeArgNone) blobPutCmd.Flags().MarkHidden("content-type") + blobCmd.AddCommand(blobDiffConfigCmd) + blobCmd.AddCommand(blobDiffLayerCmd) blobCmd.AddCommand(blobGetCmd) blobCmd.AddCommand(blobPutCmd) rootCmd.AddCommand(blobCmd) } +func runBlobDiffConfig(cmd *cobra.Command, args []string) error { + diffOpts := []diff.Opt{} + if blobOpts.diffCtx > 0 { + diffOpts = append(diffOpts, diff.WithContext(blobOpts.diffCtx, blobOpts.diffCtx)) + } + if blobOpts.diffFullCtx { + diffOpts = append(diffOpts, diff.WithFullContext()) + } + ctx := cmd.Context() + r1, err := ref.New(args[0]) + if err != nil { + return err + } + r2, err := ref.New(args[2]) + if err != nil { + return err + } + rc := newRegClient() + + // open both configs, and output each as formatted json + d1, err := digest.Parse(args[1]) + if err != nil { + return err + } + c1, err := rc.BlobGetOCIConfig(ctx, r1, types.Descriptor{Digest: d1}) + if err != nil { + return err + } + c1Json, err := json.MarshalIndent(c1, "", " ") + if err != nil { + return err + } + + d2, err := digest.Parse(args[3]) + if err != nil { + return err + } + c2, err := rc.BlobGetOCIConfig(ctx, r2, types.Descriptor{Digest: d2}) + if err != nil { + return err + } + c2Json, err := json.MarshalIndent(c2, "", " ") + if err != nil { + return err + } + + cDiff := diff.Diff(strings.Split(string(c1Json), "\n"), strings.Split(string(c2Json), "\n"), diffOpts...) + + _, err = fmt.Fprintln(os.Stdout, strings.Join(cDiff, "\n")) + return err + // TODO: support templating + // return template.Writer(os.Stdout, blobOpts.format, cDiff) +} + +func runBlobDiffLayer(cmd *cobra.Command, args []string) error { + diffOpts := []diff.Opt{} + if blobOpts.diffCtx > 0 { + diffOpts = append(diffOpts, diff.WithContext(blobOpts.diffCtx, blobOpts.diffCtx)) + } + if blobOpts.diffFullCtx { + diffOpts = append(diffOpts, diff.WithFullContext()) + } + ctx := cmd.Context() + r1, err := ref.New(args[0]) + if err != nil { + return err + } + r2, err := ref.New(args[2]) + if err != nil { + return err + } + rc := newRegClient() + + // open both blobs, and generate reports of each content + d1, err := digest.Parse(args[1]) + if err != nil { + return err + } + b1, err := rc.BlobGet(ctx, r1, types.Descriptor{Digest: d1}) + if err != nil { + return err + } + defer b1.Close() + btr1, err := b1.ToTarReader() + if err != nil { + return err + } + tr1, err := btr1.GetTarReader() + if err != nil { + return err + } + rep1, err := blobReportLayer(tr1) + if err != nil { + return err + } + err = btr1.Close() + if err != nil { + return err + } + + d2, err := digest.Parse(args[3]) + if err != nil { + return err + } + b2, err := rc.BlobGet(ctx, r2, types.Descriptor{Digest: d2}) + if err != nil { + return err + } + defer b2.Close() + btr2, err := b2.ToTarReader() + if err != nil { + return err + } + tr2, err := btr2.GetTarReader() + if err != nil { + return err + } + rep2, err := blobReportLayer(tr2) + if err != nil { + return err + } + err = btr2.Close() + if err != nil { + return err + } + + // run diff and output result + lDiff := diff.Diff(rep1, rep2, diffOpts...) + _, err = fmt.Fprintln(os.Stdout, strings.Join(lDiff, "\n")) + return err +} + func runBlobGet(cmd *cobra.Command, args []string) error { ctx := cmd.Context() r, err := ref.New(args[0]) @@ -155,3 +322,37 @@ func runBlobPut(cmd *cobra.Command, args []string) error { return template.Writer(os.Stdout, blobOpts.formatPut, result) } + +func blobReportLayer(tr *tar.Reader) ([]string, error) { + report := []string{} + if tr == nil { + return report, nil + } + for { + th, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } + return report, err + } + line := fmt.Sprintf("%s %d/%d %8d", fs.FileMode(th.Mode).String(), th.Uid, th.Gid, th.Size) + if !blobOpts.diffIgnoreTime { + line += " " + th.ModTime.Format(time.RFC3339) + } + line += fmt.Sprintf(" %-40s", th.Name) + if th.Size > 0 { + d := digest.Canonical.Digester() + size, err := io.Copy(d.Hash(), tr) + if err != nil { + return report, fmt.Errorf("failed to read %s: %w", th.Name, err) + } + if size != th.Size { + return report, fmt.Errorf("size mismatch for %s, expected %d, read %d", th.Name, th.Size, size) + } + line += " " + d.Digest().String() + } + report = append(report, line) + } + return report, nil +} diff --git a/cmd/regctl/manifest.go b/cmd/regctl/manifest.go index 53e5ebe..3a7ed11 100644 --- a/cmd/regctl/manifest.go +++ b/cmd/regctl/manifest.go @@ -2,12 +2,14 @@ package main import ( "context" + "encoding/json" "fmt" "io/ioutil" "os" "strings" "github.com/regclient/regclient" + "github.com/regclient/regclient/internal/diff" "github.com/regclient/regclient/pkg/template" "github.com/regclient/regclient/types" "github.com/regclient/regclient/types/manifest" @@ -36,6 +38,14 @@ layers (blobs) separately or not at all. See also the "tag delete" command.`, RunE: runManifestDelete, } +var manifestDiffCmd = &cobra.Command{ + Use: "diff ", + Short: "compare manifests", + Args: cobra.ExactArgs(2), + ValidArgsFunction: completeArgTag, + RunE: runManifestDiff, +} + var manifestDigestCmd = &cobra.Command{ Use: "digest ", Short: "retrieve digest of manifest", @@ -67,6 +77,8 @@ var manifestPutCmd = &cobra.Command{ var manifestOpts struct { byDigest bool contentType string + diffCtx int + diffFullCtx bool forceTagDeref bool format string formatPut string @@ -78,6 +90,9 @@ var manifestOpts struct { func init() { manifestDeleteCmd.Flags().BoolVarP(&manifestOpts.forceTagDeref, "force-tag-dereference", "", false, "Dereference the a tag to a digest, this is unsafe") + manifestDiffCmd.Flags().IntVarP(&manifestOpts.diffCtx, "context", "", 3, "Lines of context") + manifestDiffCmd.Flags().BoolVarP(&manifestOpts.diffFullCtx, "context-full", "", false, "Show all lines of context") + manifestDigestCmd.Flags().BoolVarP(&manifestOpts.list, "list", "", true, "Do not resolve platform from manifest list (enabled by default)") manifestDigestCmd.Flags().StringVarP(&manifestOpts.platform, "platform", "p", "", "Specify platform (e.g. linux/amd64 or local)") manifestDigestCmd.Flags().BoolVarP(&manifestOpts.requireList, "require-list", "", false, "Fail if manifest list is not received") @@ -98,6 +113,7 @@ func init() { manifestPutCmd.Flags().StringVarP(&manifestOpts.formatPut, "format", "", "", "Format output with go template syntax") manifestCmd.AddCommand(manifestDeleteCmd) + manifestCmd.AddCommand(manifestDiffCmd) manifestCmd.AddCommand(manifestDigestCmd) manifestCmd.AddCommand(manifestGetCmd) manifestCmd.AddCommand(manifestPutCmd) @@ -214,6 +230,57 @@ func runManifestDelete(cmd *cobra.Command, args []string) error { return nil } +func runManifestDiff(cmd *cobra.Command, args []string) error { + diffOpts := []diff.Opt{} + if manifestOpts.diffCtx > 0 { + diffOpts = append(diffOpts, diff.WithContext(manifestOpts.diffCtx, manifestOpts.diffCtx)) + } + if manifestOpts.diffFullCtx { + diffOpts = append(diffOpts, diff.WithFullContext()) + } + ctx := cmd.Context() + r1, err := ref.New(args[0]) + if err != nil { + return err + } + r2, err := ref.New(args[1]) + if err != nil { + return err + } + + rc := newRegClient() + + log.WithFields(logrus.Fields{ + "ref1": r1.CommonName(), + "ref2": r2.CommonName(), + }).Debug("Manifest diff") + + m1, err := rc.ManifestGet(ctx, r1) + if err != nil { + return err + } + m2, err := rc.ManifestGet(ctx, r2) + if err != nil { + return err + } + + m1Json, err := json.MarshalIndent(m1, "", " ") + if err != nil { + return err + } + m2Json, err := json.MarshalIndent(m2, "", " ") + if err != nil { + return err + } + + mDiff := diff.Diff(strings.Split(string(m1Json), "\n"), strings.Split(string(m2Json), "\n"), diffOpts...) + + _, err = fmt.Fprintln(os.Stdout, strings.Join(mDiff, "\n")) + return err + // TODO: support templating + // return template.Writer(os.Stdout, manifestOpts.format, mDiff) +} + func runManifestDigest(cmd *cobra.Command, args []string) error { ctx := cmd.Context() if manifestOpts.platform != "" && !flagChanged(cmd, "list") { diff --git a/docs/README.md b/docs/README.md index 3270fe8..14fb183 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ # regclient Documentation -- [Project Specific Documentation](#project-dpecific-documentation) +- [Project Specific Documentation](#project-specific-documentation) - [Schemes](#schemes) - [Template Functions](#template-functions) - [FAQ](#faq) diff --git a/docs/regctl.md b/docs/regctl.md index c0ab5bb..5ae5006 100644 --- a/docs/regctl.md +++ b/docs/regctl.md @@ -162,6 +162,7 @@ Usage: Available Commands: delete delete a manifest + diff compare manifests digest retrieve digest of manifest get retrieve manifest or manifest list put push manifest or manifest list @@ -172,6 +173,9 @@ This will impact all tags pointing to the same manifest and requires a digest to Using `--force-tag-dereference` will automatically lookup the digest for a specific tag, and will delete the underlying image which will delete any other tags pointing to the same image. Use `tag delete` to remove a single tag. +The `diff` command compares two manifests and shows what has changed between these manifests. +See also the `blob diff-config` and `blob diff-layer` commands. + The `digest` command is useful to pin the image used within your deployment to an immutable sha256 checksum. The `get` command retrieves the manifest from the registry, showing individual components of an image. @@ -191,10 +195,38 @@ Usage: regctl blob [command] Available Commands: + diff-config diff two image configs + diff-layer diff two tar layers get download a blob/layer put upload a blob/layer ``` +The `diff-config` command compares two config blobs, showing the differences between the configs. + +The `diff-layer` command compares two layer blobs, showing exactly what changed in the filesystem between the two layers. + +Example usage: + +```shell +$ regctl blob diff-layer --context 0 --ignore-timestamp \ + alpine sha256:627fad6f28f79c3907ad18a4399be4d810c0e1bb503fe3712217145c555b9d2f \ + alpine sha256:decfdc335d9bae9ca06166e1a4fc2cdf8c2344a42d85c8a1d3f964aab59ecff5 +@@ -6,1 +6,1 @@ +- -rwxr-xr-x 0/0 824904 bin/busybox sha256:4a1876b4899ce26853ec5f5eb75248e5a2d9e07369c4435c8d41e83393e04a9b ++ -rwxr-xr-x 0/0 829000 bin/busybox sha256:d15929a78a86065c41dd274f2f3f058986b6f5eee4a4c881c83d4fa4179e58ee +@@ -85,1 +85,1 @@ +- -rw-r--r-- 0/0 8 etc/alpine-release sha256:9fa33d932bbf6e5784f15b467a9a10e4ce43993c2341ee742f23ce0196fd73e9 ++ -rw-r--r-- 0/0 7 etc/alpine-release sha256:922fe0c3de073b01988e23348ea184456161678c5e329e6f34be89be24383f93 +@@ -95,1 +95,1 @@ +- -rw-r--r-- 0/0 103 etc/apk/repositories sha256:e44b25ef011171afece2ff51a206b732f84c7f3ddc8291c6dc50cb1572c0ae1c ++ -rw-r--r-- 0/0 103 etc/apk/repositories sha256:7b5dba82c50baee0b4aee54038ca2265df42d1f873d1601934bb45daf17311b4 +@@ -101,1 +101,1 @@ +- -rw-r--r-- 0/0 682 etc/group sha256:412af628e00706d3c90a5d465d59cc422ff68d79eeb8870c4f33ed6df04b2871 ++ -rw-r--r-- 0/0 697 etc/group sha256:0632d55a68081065097472fe7bc7c66f0785f3b78f39fb23f622d24a7e09be9f +@@ -106,1 +106,1 @@ +... +``` + The `get` command will pull a specific sha256 blob from the registry and returns it to stdout. If you are requesting a tar layer, be sure to direct this to a file or command that parses the content. For json blobs, it's useful to redirect this to a command like `jq`. diff --git a/internal/diff/diff.go b/internal/diff/diff.go new file mode 100644 index 0000000..d76935c --- /dev/null +++ b/internal/diff/diff.go @@ -0,0 +1,112 @@ +// Package diff computes the efficient set of changes (insert/delete) between two arrays of strings +package diff + +import "fmt" + +// opKind is used to denote the type of operation a line represents. +type opKind int + +const ( + // OpDelete is the operation kind for a line that is present in the input + // but not in the output. + OpDelete opKind = iota + // OpInsert is the operation kind for a line that is new in the output. + OpInsert +) + +type operation struct { + Kind opKind + X1, X2 int // indices of the line in a + Y1, Y2 int // indices of the line in b +} + +type Opt func(*conf) + +type conf struct { + contextA int + contextB int + contextFull bool +} + +func WithContext(a, b int) func(*conf) { + return func(c *conf) { + c.contextA = a + c.contextB = b + } +} + +func WithFullContext() func(*conf) { + return func(c *conf) { + c.contextFull = true + } +} + +// Diff returns the difference between two strings +func Diff(a, b []string, opts ...Opt) []string { + c := conf{} + for _, fn := range opts { + fn(&c) + } + + diffLines := []string{} + setLines := []string{} + ops := myersOperations(a, b) + sX1, sX2, sY1, sY2 := -1, -1, -1, -1 + addSet := func() { + if len(setLines) == 0 { + return + } + // calculate how many lines of context to add + cA, cB := c.contextA, c.contextB + if sX1-cA < 0 || c.contextFull { + cA = sX1 + } + if sX2+cB > len(a) || c.contextFull { + cB = len(a) - sX2 + } + // add header + diffLines = append(diffLines, fmt.Sprintf("@@ -%d,%d +%d,%d @@", sX1-cA+1, sX2+cA+cB-sX1, sY1-cA+1, sY2+cA+cB-sY1)) + // add context before, the change set, and context after + if cA > 0 { + for _, line := range a[sX1-cA : sX1] { + diffLines = append(diffLines, " "+line) + } + } + diffLines = append(diffLines, setLines...) + setLines = []string{} // reset the setLines to a new array + if cB > 0 { + for _, line := range a[sX2 : sX2+cB] { + diffLines = append(diffLines, " "+line) + } + } + } + for _, op := range ops { + // compare from last set + dX, dY := op.X1-sX2, op.Y1-sY2 + if dX != dY || (dX > c.contextA && dX > c.contextB && !c.contextFull) { + // unexpected diff lines or gap exceeds context limits, create a new set + addSet() + sX1, sY1 = op.X1, op.Y1 + } else if dX > 0 { + // add common lines between two diffs + for _, line := range a[sX2+1 : op.X1] { + setLines = append(setLines, " "+line) + } + } + // add entries to this set, either delete or add + switch op.Kind { + case OpDelete: + for _, line := range a[op.X1:op.X2] { + setLines = append(setLines, "- "+line) + } + case OpInsert: + for _, line := range b[op.Y1:op.Y2] { + setLines = append(setLines, "+ "+line) + } + } + // update end of set + sX2, sY2 = op.X2, op.Y2 + } + addSet() + return diffLines +} diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go new file mode 100644 index 0000000..08ac284 --- /dev/null +++ b/internal/diff/diff_test.go @@ -0,0 +1,146 @@ +package diff + +import "testing" + +func TestDiff(t *testing.T) { + tests := []struct { + name string + a, b, expect []string + opts []Opt + }{ + { + name: "empty", + }, + { + name: "deletes", + a: []string{"a", "b", "c"}, + expect: []string{ + "@@ -1,3 +1,0 @@", + "- a", + "- b", + "- c", + }, + }, + { + name: "inserts", + b: []string{"a", "b", "c"}, + expect: []string{ + "@@ -1,0 +1,3 @@", + "+ a", + "+ b", + "+ c", + }, + }, + { + name: "equal", + a: []string{"a", "b", "c"}, + b: []string{"a", "b", "c"}, + }, + { + name: "myers", + a: []string{"a", "b", "c", "a", "b", "b", "a"}, + b: []string{"c", "b", "a", "b", "a", "c"}, + expect: []string{ + "@@ -1,2 +1,0 @@", + "- a", + "- b", + "@@ -4,0 +2,1 @@", + "+ b", + "@@ -6,1 +5,0 @@", + "- b", + "@@ -8,0 +6,1 @@", + "+ c", + }, + }, + { + name: "replace", + a: []string{"a", "b", "c"}, + b: []string{"d", "e", "f"}, + expect: []string{ + "@@ -1,3 +1,3 @@", + "- a", + "- b", + "- c", + "+ d", + "+ e", + "+ f", + }, + }, + { + name: "change one", + a: []string{"a", "b", "c", "d"}, + b: []string{"a", "e", "f", "d"}, + expect: []string{ + "@@ -2,2 +2,2 @@", + "- b", + "- c", + "+ e", + "+ f", + }, + }, + { + name: "context one", + a: []string{"a", "b", "c", "d", "e"}, + b: []string{"a", "b", "f", "d", "e"}, + opts: []Opt{WithContext(1, 1)}, + expect: []string{ + "@@ -2,3 +2,3 @@", + " b", + "- c", + "+ f", + " d", + }, + }, + { + name: "context three", + a: []string{"a", "b", "c", "d", "e"}, + b: []string{"a", "b", "f", "d", "e"}, + opts: []Opt{WithContext(3, 3)}, + expect: []string{ + "@@ -1,5 +1,5 @@", + " a", + " b", + "- c", + "+ f", + " d", + " e", + }, + }, + { + name: "context full", + a: []string{"a", "b", "c", "d", "e"}, + b: []string{"a", "b", "f", "d", "e"}, + opts: []Opt{WithFullContext()}, + expect: []string{ + "@@ -1,5 +1,5 @@", + " a", + " b", + "- c", + "+ f", + " d", + " e", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := Diff(tt.a, tt.b, tt.opts...) + if !strSliceEq(tt.expect, result) { + t.Errorf("mismatch, expected %v, received %v", tt.expect, result) + } + }) + } + +} + +func strSliceEq(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/internal/diff/myers.go b/internal/diff/myers.go new file mode 100644 index 0000000..9d6e6a5 --- /dev/null +++ b/internal/diff/myers.go @@ -0,0 +1,155 @@ +package diff + +// Recommended reading: +// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/ +// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2 +// https://cs.opensource.google/go/x/tools/+/refs/tags/v0.1.11:internal/lsp/diff/myers/diff.go;l=19 + +// myersOperations returns the list of operations to convert a into b. +// This consolidates operations for multiple lines and skips equal lines. +func myersOperations(a, b []string) []*operation { + if len(a) == 0 && len(b) == 0 { + return nil + } + trace, offset := myersShortestSeq(a, b) + snakes := myersBacktrack(trace, len(a), len(b), offset) + M, N := len(a), len(b) + var i int + solution := make([]*operation, len(a)+len(b)) + add := func(op *operation, x2, y2 int) { + if op == nil { + return + } + if i > 0 && solution[i-1].Kind == op.Kind && solution[i-1].X2 == op.X1 && solution[i-1].Y2 == op.Y1 { + // extend add/delete from previous entry + solution[i-1].X2 = x2 + solution[i-1].Y2 = y2 + } else { + // add a new operation + op.X2 = x2 + op.Y2 = y2 + solution[i] = op + i++ + } + } + x, y := 0, 0 + for _, snake := range snakes { + if len(snake) < 2 { + continue + } + if snake[0]-snake[1] > x-y { + // delete (horizontal) + op := &operation{ + Kind: OpDelete, + X1: x, + Y1: y, + } + x++ + if x <= M { + add(op, x, y) + } + } else if snake[0]-snake[1] < x-y { + // insert (vertical) + op := &operation{ + Kind: OpInsert, + X1: x, + Y1: y, + } + y++ + if y <= N { + add(op, x, y) + } + } + // equal (diagonal) + for x < snake[0] { + x++ + y++ + } + if x >= M && y >= N { + break + } + } + return solution[:i] +} + +// myersBacktrack returns a list of "snakes" for a given trace. +// A "snake" is a single deletion or insertion followed by zero or more diagonals. +// snakes[d] is the x,y coordinate of the best position on the best path at distance d. +func myersBacktrack(trace [][]int, x, y, offset int) [][]int { + snakes := make([][]int, len(trace)) + d := len(trace) - 1 + for ; x >= 0 && y >= 0 && d > 0; d-- { + V := trace[d] + if len(V) == 0 { + continue + } + snakes[d] = []int{x, y} + + k := x - y + + var kPrev int + if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { + kPrev = k + 1 + } else { + kPrev = k - 1 + } + + x = V[kPrev+offset] + y = x - kPrev + } + if x < 0 || y < 0 { + return snakes + } + snakes[d] = []int{x, y} + return snakes +} + +// myersShortestSeq returns the shortest edit sequence that converts a into b. +// M and N, length of a and b respectively. +// x: index of a, x+1 moves right, indicating deletion from a. +// y: index of b, y+1 moves down, indicating insertion from b. +// k: diagonals represented by the equation y = x - k. If inserts==deletes, k=0. +// V[k]=x: best values of x for each k diagonal. +// d: distance, sum of inserts/deletes. +// trace[d]=V, best values for x for each k diagonal and distance d. +// return is the trace and offset +func myersShortestSeq(a, b []string) ([][]int, int) { + M, N := len(a), len(b) + V := make([]int, 2*(N+M)+1) + offset := N + M + trace := make([][]int, N+M+1) + // iterate up to the maximum possible length + for d := 0; d <= N+M; d++ { + newV := make([]int, len(V)) + // move in increments of 2 because end points for even d are on even k lines + for k := -d; k <= d; k += 2 { + // At each point, we either go down or to the right. + // We go down if k == -d, and we go to the right if k == d. + // We also prioritize the maximum x value, because we prefer deletions to insertions. + var x int + if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { + x = V[k+1+offset] // down + } else { + x = V[k-1+offset] + 1 // right + } + y := x - k + // Diagonal moves while we have equal contents. + for x < M && y < N && a[x] == b[y] { + x++ + y++ + } + V[k+offset] = x + // Return if we've exceeded the maximum values. + if x == M && y == N { + // Makes sure to save the state of the array before returning. + copy(newV, V) + trace[d] = newV + return trace, offset + } + } + // Save the state of the array. + copy(newV, V) + trace[d] = newV + } + return nil, 0 +} diff --git a/types/blob/blob_test.go b/types/blob/blob_test.go index 3ad066b..e2316b3 100644 --- a/types/blob/blob_test.go +++ b/types/blob/blob_test.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "net/http" + "os" "testing" "github.com/opencontainers/go-digest" @@ -234,6 +235,7 @@ func TestReader(t *testing.T) { if b.GetDescriptor().Size != exLen { t.Errorf("length mismatch, expected %d, received %d", exLen, b.GetDescriptor().Size) } + }) t.Run("ociconfig", func(t *testing.T) { @@ -379,6 +381,96 @@ func TestOCI(t *testing.T) { }) } +func TestTarReader(t *testing.T) { + fh, err := os.Open("../../testdata/layer.tar") + if err != nil { + t.Errorf("failed to open test data: %v", err) + return + } + digger := digest.Canonical.Digester() + fhSize, err := io.Copy(digger.Hash(), fh) + if err != nil { + t.Errorf("failed to build digest on test data: %v", err) + return + } + fh.Close() + dig := digger.Digest() + + tests := []struct { + name string + opts []Opts + errClose bool + }{ + { + name: "no desc", + opts: []Opts{}, + }, + { + name: "good desc", + opts: []Opts{ + WithDesc(types.Descriptor{ + MediaType: types.MediaTypeOCI1Layer, + Size: fhSize, + Digest: dig, + }), + }, + }, + { + name: "bad desc", + opts: []Opts{ + WithDesc(types.Descriptor{ + MediaType: types.MediaTypeOCI1Layer, + Size: fhSize, + Digest: digest.FromString("bad digest"), + }), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fh, err := os.Open("../../testdata/layer.tar") + if err != nil { + t.Errorf("failed to open test data: %v", err) + return + } + opts := append(tt.opts, WithReader(fh)) + btr := NewTarReader(opts...) + tr, err := btr.GetTarReader() + if err != nil { + t.Errorf("failed to get tar reader: %v", err) + return + } + for { + th, err := tr.Next() + if err != nil { + if err != io.EOF { + t.Errorf("failed to read tar: %v", err) + return + } + break + } + if th.Size != 0 { + b, err := io.ReadAll(tr) + if err != nil { + t.Errorf("failed to read content: %v", err) + break + } + if int64(len(b)) != th.Size { + t.Errorf("content size mismatch, expected %d, received %d", th.Size, len(b)) + } + } + } + err = btr.Close() + if !tt.errClose && err != nil { + t.Errorf("failed to close tar reader: %v", err) + } else if tt.errClose && err == nil { + t.Errorf("close did not fail") + } + }) + } +} + func cmpSliceString(a, b []string) bool { if len(a) != len(b) { return false diff --git a/types/blob/reader.go b/types/blob/reader.go index 6d67a42..0af260f 100644 --- a/types/blob/reader.go +++ b/types/blob/reader.go @@ -18,6 +18,7 @@ type Reader interface { Blob io.ReadCloser ToOCIConfig() (OCIConfig, error) + ToTarReader() (TarReader, error) } // reader is the internal struct implementing BlobReader @@ -163,3 +164,19 @@ func (b *reader) ToOCIConfig() (OCIConfig, error) { WithResp(b.resp), ), nil } + +func (b *reader) ToTarReader() (TarReader, error) { + if !b.blobSet { + return nil, fmt.Errorf("blob is not defined") + } + if b.readBytes != 0 { + return nil, fmt.Errorf("unable to convert after read has been performed") + } + return NewTarReader( + WithDesc(b.desc), + WithHeader(b.rawHeader), + WithRef(b.r), + WithResp(b.resp), + WithReader(b.reader), + ), nil +} diff --git a/types/blob/tar.go b/types/blob/tar.go new file mode 100644 index 0000000..c479028 --- /dev/null +++ b/types/blob/tar.go @@ -0,0 +1,102 @@ +package blob + +import ( + "archive/tar" + "fmt" + "io" + "io/ioutil" + + "github.com/opencontainers/go-digest" + "github.com/regclient/regclient/pkg/archive" +) + +// TarReader reads or writes to a blob with tar contents and optional compression +type TarReader interface { + Blob + io.Closer + GetTarReader() (*tar.Reader, error) +} + +type tarReader struct { + common + origRdr io.Reader + reader io.Reader + digester digest.Digester + tr *tar.Reader +} + +// NewTarReader creates a TarReader +func NewTarReader(opts ...Opts) TarReader { + bc := blobConfig{} + for _, opt := range opts { + opt(&bc) + } + c := common{ + desc: bc.desc, + r: bc.r, + rawHeader: bc.header, + resp: bc.resp, + } + tr := tarReader{ + common: c, + origRdr: bc.rdr, + } + if bc.rdr != nil { + tr.blobSet = true + tr.digester = digest.Canonical.Digester() + tr.reader = io.TeeReader(bc.rdr, tr.digester.Hash()) + } + return &tr +} + +// Close attempts to close the reader and populates/validates the digest +func (tr *tarReader) Close() error { + var err error + if tr.digester != nil { + dig := tr.digester.Digest() + tr.digester = nil + if tr.desc.Digest.String() != "" && dig != tr.desc.Digest { + err = fmt.Errorf("digest mismatch, expected %s, received %s", tr.desc.Digest.String(), dig.String()) + } + tr.desc.Digest = dig + } + if tr.origRdr == nil { + return err + } + // attempt to close if available in original reader + if trc, ok := tr.origRdr.(io.Closer); ok { + return trc.Close() + } + return err +} + +// GetTarReader returns the tar.Reader for the blob +func (tr *tarReader) GetTarReader() (*tar.Reader, error) { + if tr.reader == nil { + return nil, fmt.Errorf("blob has no reader defined") + } + if tr.tr == nil { + dr, err := archive.Decompress(tr.reader) + if err != nil { + return nil, err + } + tr.tr = tar.NewReader(dr) + } + return tr.tr, nil +} + +// RawBody returns the original body from the request +func (tr *tarReader) RawBody() ([]byte, error) { + if !tr.blobSet { + return []byte{}, fmt.Errorf("Blob is not defined") + } + if tr.tr != nil { + return []byte{}, fmt.Errorf("RawBody cannot be returned after TarReader returned") + } + b, err := ioutil.ReadAll(tr.reader) + if err != nil { + return b, err + } + err = tr.Close() + return b, err +}