Kaynağa Gözat

Merge pull request #7889 from vbatts/vbatts-tarsum_name_collision

tarsum: name collision fix
unclejack 10 yıl önce
ebeveyn
işleme
458b019e62

+ 6 - 5
builder/internals.go

@@ -214,11 +214,11 @@ func (b *Builder) runContextCommand(args []string, allowRemote bool, allowDecomp
 			return err
 		} else if fi.IsDir() {
 			var subfiles []string
-			for file, sum := range sums {
-				absFile := path.Join(b.contextPath, file)
+			for _, fileInfo := range sums {
+				absFile := path.Join(b.contextPath, fileInfo.Name())
 				absOrigPath := path.Join(b.contextPath, origPath)
 				if strings.HasPrefix(absFile, absOrigPath) {
-					subfiles = append(subfiles, sum)
+					subfiles = append(subfiles, fileInfo.Sum())
 				}
 			}
 			sort.Strings(subfiles)
@@ -230,8 +230,9 @@ func (b *Builder) runContextCommand(args []string, allowRemote bool, allowDecomp
 				origPath = origPath[1:]
 			}
 			origPath = strings.TrimPrefix(origPath, "./")
-			if h, ok := sums[origPath]; ok {
-				hash = "file:" + h
+			// This will match on the first file in sums of the archive
+			if fis := sums.GetFile(origPath); fis != nil {
+				hash = "file:" + fis.Sum()
 			}
 		}
 		b.Config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) %s %s in %s", cmdName, hash, dest)}

+ 125 - 0
pkg/tarsum/fileinfosums.go

@@ -0,0 +1,125 @@
+package tarsum
+
+import "sort"
+
+// This info will be accessed through interface so the actual name and sum cannot be medled with
+type FileInfoSumInterface interface {
+	// File name
+	Name() string
+	// Checksum of this particular file and its headers
+	Sum() string
+	// Position of file in the tar
+	Pos() int64
+}
+
+type fileInfoSum struct {
+	name string
+	sum  string
+	pos  int64
+}
+
+func (fis fileInfoSum) Name() string {
+	return fis.name
+}
+func (fis fileInfoSum) Sum() string {
+	return fis.sum
+}
+func (fis fileInfoSum) Pos() int64 {
+	return fis.pos
+}
+
+type FileInfoSums []FileInfoSumInterface
+
+// GetFile returns the first FileInfoSumInterface with a matching name
+func (fis FileInfoSums) GetFile(name string) FileInfoSumInterface {
+	for i := range fis {
+		if fis[i].Name() == name {
+			return fis[i]
+		}
+	}
+	return nil
+}
+
+// GetAllFile returns a FileInfoSums with all matching names
+func (fis FileInfoSums) GetAllFile(name string) FileInfoSums {
+	f := FileInfoSums{}
+	for i := range fis {
+		if fis[i].Name() == name {
+			f = append(f, fis[i])
+		}
+	}
+	return f
+}
+
+func contains(s []string, e string) bool {
+	for _, a := range s {
+		if a == e {
+			return true
+		}
+	}
+	return false
+}
+
+func (fis FileInfoSums) GetDuplicatePaths() (dups FileInfoSums) {
+	seen := make(map[string]int, len(fis)) // allocate earl. no need to grow this map.
+	for i := range fis {
+		f := fis[i]
+		if _, ok := seen[f.Name()]; ok {
+			dups = append(dups, f)
+		} else {
+			seen[f.Name()] = 0
+		}
+	}
+	return dups
+}
+
+func (fis FileInfoSums) Len() int      { return len(fis) }
+func (fis FileInfoSums) Swap(i, j int) { fis[i], fis[j] = fis[j], fis[i] }
+
+func (fis FileInfoSums) SortByPos() {
+	sort.Sort(byPos{fis})
+}
+
+func (fis FileInfoSums) SortByNames() {
+	sort.Sort(byName{fis})
+}
+
+func (fis FileInfoSums) SortBySums() {
+	dups := fis.GetDuplicatePaths()
+	if len(dups) > 0 {
+		sort.Sort(bySum{fis, dups})
+	} else {
+		sort.Sort(bySum{fis, nil})
+	}
+}
+
+// byName is a sort.Sort helper for sorting by file names.
+// If names are the same, order them by their appearance in the tar archive
+type byName struct{ FileInfoSums }
+
+func (bn byName) Less(i, j int) bool {
+	if bn.FileInfoSums[i].Name() == bn.FileInfoSums[j].Name() {
+		return bn.FileInfoSums[i].Pos() < bn.FileInfoSums[j].Pos()
+	}
+	return bn.FileInfoSums[i].Name() < bn.FileInfoSums[j].Name()
+}
+
+// bySum is a sort.Sort helper for sorting by the sums of all the fileinfos in the tar archive
+type bySum struct {
+	FileInfoSums
+	dups FileInfoSums
+}
+
+func (bs bySum) Less(i, j int) bool {
+	if bs.dups != nil && bs.FileInfoSums[i].Name() == bs.FileInfoSums[j].Name() {
+		return bs.FileInfoSums[i].Pos() < bs.FileInfoSums[j].Pos()
+	}
+	return bs.FileInfoSums[i].Sum() < bs.FileInfoSums[j].Sum()
+}
+
+// byPos is a sort.Sort helper for sorting by the sums of all the fileinfos by their original order
+type byPos struct{ FileInfoSums }
+
+func (bp byPos) Less(i, j int) bool {
+	return bp.FileInfoSums[i].Pos() < bp.FileInfoSums[j].Pos()
+}

+ 45 - 0
pkg/tarsum/fileinfosums_test.go

@@ -0,0 +1,45 @@
+package tarsum
+
+import "testing"
+
+func newFileInfoSums() FileInfoSums {
+	return FileInfoSums{
+		fileInfoSum{name: "file3", sum: "2abcdef1234567890", pos: 2},
+		fileInfoSum{name: "dup1", sum: "deadbeef1", pos: 5},
+		fileInfoSum{name: "file1", sum: "0abcdef1234567890", pos: 0},
+		fileInfoSum{name: "file4", sum: "3abcdef1234567890", pos: 3},
+		fileInfoSum{name: "dup1", sum: "deadbeef0", pos: 4},
+		fileInfoSum{name: "file2", sum: "1abcdef1234567890", pos: 1},
+	}
+}
+
+func TestSortFileInfoSums(t *testing.T) {
+	dups := newFileInfoSums().GetAllFile("dup1")
+	if len(dups) != 2 {
+		t.Errorf("expected length 2, got %d", len(dups))
+	}
+	dups.SortByNames()
+	if dups[0].Pos() != 4 {
+		t.Errorf("sorted dups should be ordered by position. Expected 4, got %d", dups[0].Pos())
+	}
+
+	fis := newFileInfoSums()
+	expected := "0abcdef1234567890"
+	fis.SortBySums()
+	got := fis[0].Sum()
+	if got != expected {
+		t.Errorf("Expected %q, got %q", expected, got)
+	}
+
+	fis = newFileInfoSums()
+	expected = "dup1"
+	fis.SortByNames()
+	gotFis := fis[0]
+	if gotFis.Name() != expected {
+		t.Errorf("Expected %q, got %q", expected, gotFis.Name())
+	}
+	// since a duplicate is first, ensure it is ordered first by position too
+	if gotFis.Pos() != 4 {
+		t.Errorf("Expected %d, got %d", 4, gotFis.Pos())
+	}
+}

+ 11 - 14
pkg/tarsum/tarsum.go

@@ -39,7 +39,7 @@ func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
 // checksums of a tar archive
 type TarSum interface {
 	io.Reader
-	GetSums() map[string]string
+	GetSums() FileInfoSums
 	Sum([]byte) string
 	Version() Version
 }
@@ -54,7 +54,8 @@ type tarSum struct {
 	bufGz              *bytes.Buffer
 	bufData            []byte
 	h                  hash.Hash
-	sums               map[string]string
+	sums               FileInfoSums
+	fileCounter        int64
 	currentFile        string
 	finished           bool
 	first              bool
@@ -126,7 +127,7 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
 		ts.h = sha256.New()
 		ts.h.Reset()
 		ts.first = true
-		ts.sums = make(map[string]string)
+		ts.sums = FileInfoSums{}
 	}
 
 	if ts.finished {
@@ -153,7 +154,8 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
 				return 0, err
 			}
 			if !ts.first {
-				ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil))
+				ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
+				ts.fileCounter++
 				ts.h.Reset()
 			} else {
 				ts.first = false
@@ -218,25 +220,20 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
 }
 
 func (ts *tarSum) Sum(extra []byte) string {
-	var sums []string
-
-	for _, sum := range ts.sums {
-		sums = append(sums, sum)
-	}
-	sort.Strings(sums)
+	ts.sums.SortBySums()
 	h := sha256.New()
 	if extra != nil {
 		h.Write(extra)
 	}
-	for _, sum := range sums {
-		log.Debugf("-->%s<--", sum)
-		h.Write([]byte(sum))
+	for _, fis := range ts.sums {
+		log.Debugf("-->%s<--", fis.Sum())
+		h.Write([]byte(fis.Sum()))
 	}
 	checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil))
 	log.Debugf("checksum processed: %s", checksum)
 	return checksum
 }
 
-func (ts *tarSum) GetSums() map[string]string {
+func (ts *tarSum) GetSums() FileInfoSums {
 	return ts.sums
 }

+ 16 - 0
pkg/tarsum/tarsum_test.go

@@ -59,6 +59,22 @@ var testLayers = []testLayer{
 	{
 		options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
 		tarsum:  "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"},
+	{
+		// this tar has two files with the same path
+		filename: "testdata/collision/collision-0.tar",
+		tarsum:   "tarsum+sha256:08653904a68d3ab5c59e65ef58c49c1581caa3c34744f8d354b3f575ea04424a"},
+	{
+		// this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above
+		filename: "testdata/collision/collision-1.tar",
+		tarsum:   "tarsum+sha256:b51c13fbefe158b5ce420d2b930eef54c5cd55c50a2ee4abdddea8fa9f081e0d"},
+	{
+		// this tar has newer of collider-0.tar, ensuring is has different hash
+		filename: "testdata/collision/collision-2.tar",
+		tarsum:   "tarsum+sha256:381547080919bb82691e995508ae20ed33ce0f6948d41cafbeb70ce20c73ee8e"},
+	{
+		// this tar has newer of collider-1.tar, ensuring is has different hash
+		filename: "testdata/collision/collision-3.tar",
+		tarsum:   "tarsum+sha256:f886e431c08143164a676805205979cd8fa535dfcef714db5515650eea5a7c0f"},
 }
 
 type sizedOptions struct {

BIN
pkg/tarsum/testdata/collision/collision-0.tar


BIN
pkg/tarsum/testdata/collision/collision-1.tar


BIN
pkg/tarsum/testdata/collision/collision-2.tar


BIN
pkg/tarsum/testdata/collision/collision-3.tar