Jelajahi Sumber

Merge pull request #47078 from vvoland/docker-save-fix-layers-2

image/save: Derive Descriptor from actual tar archive
Sebastiaan van Stijn 1 tahun lalu
induk
melakukan
a42726f866
2 mengubah file dengan 109 tambahan dan 62 penghapusan
  1. 101 62
      image/tarexport/save.go
  2. 8 0
      integration/image/save_test.go

+ 101 - 62
image/tarexport/save.go

@@ -1,6 +1,7 @@
 package tarexport // import "github.com/docker/docker/image/tarexport"
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -10,6 +11,7 @@ import (
 	"time"
 
 	"github.com/containerd/containerd/images"
+	"github.com/containerd/log"
 	"github.com/distribution/reference"
 	"github.com/docker/distribution"
 	"github.com/docker/docker/api/types/events"
@@ -34,10 +36,10 @@ type imageDescriptor struct {
 
 type saveSession struct {
 	*tarexporter
-	outDir      string
-	images      map[image.ID]*imageDescriptor
-	savedLayers map[string]struct{}
-	diffIDPaths map[layer.DiffID]string // cache every diffID blob to avoid duplicates
+	outDir       string
+	images       map[image.ID]*imageDescriptor
+	savedLayers  map[layer.DiffID]distribution.Descriptor
+	savedConfigs map[string]struct{}
 }
 
 func (l *tarexporter) Save(names []string, outStream io.Writer) error {
@@ -178,8 +180,8 @@ func (l *tarexporter) releaseLayerReferences(imgDescr map[image.ID]*imageDescrip
 }
 
 func (s *saveSession) save(outStream io.Writer) error {
-	s.savedLayers = make(map[string]struct{})
-	s.diffIDPaths = make(map[layer.DiffID]string)
+	s.savedConfigs = make(map[string]struct{})
+	s.savedLayers = make(map[layer.DiffID]distribution.Descriptor)
 
 	// get image json
 	tempDir, err := os.MkdirTemp("", "docker-export-")
@@ -403,7 +405,7 @@ func (s *saveSession) saveImage(id image.ID) (map[layer.DiffID]distribution.Desc
 		}
 
 		v1Img.OS = img.OS
-		src, err := s.saveLayer(rootFS.ChainID(), v1Img, img.Created)
+		src, err := s.saveConfigAndLayer(rootFS.ChainID(), v1Img, img.Created)
 		if err != nil {
 			return nil, err
 		}
@@ -448,88 +450,125 @@ func (s *saveSession) saveImage(id image.ID) (map[layer.DiffID]distribution.Desc
 	return foreignSrcs, nil
 }
 
-func (s *saveSession) saveLayer(id layer.ChainID, legacyImg image.V1Image, createdTime *time.Time) (distribution.Descriptor, error) {
-	if _, exists := s.savedLayers[legacyImg.ID]; exists {
-		return distribution.Descriptor{}, nil
-	}
-
+func (s *saveSession) saveConfigAndLayer(id layer.ChainID, legacyImg image.V1Image, createdTime *time.Time) (distribution.Descriptor, error) {
 	outDir := filepath.Join(s.outDir, ocispec.ImageBlobsDir)
 
-	imageConfig, err := json.Marshal(legacyImg)
+	if _, ok := s.savedConfigs[legacyImg.ID]; !ok {
+		if err := s.saveConfig(legacyImg, outDir, createdTime); err != nil {
+			return distribution.Descriptor{}, err
+		}
+	}
+
+	// serialize filesystem
+	l, err := s.lss.Get(id)
 	if err != nil {
 		return distribution.Descriptor{}, err
 	}
 
-	cfgDgst := digest.FromBytes(imageConfig)
-	configPath := filepath.Join(outDir, cfgDgst.Algorithm().String(), cfgDgst.Encoded())
-	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
+	lDiffID := l.DiffID()
+	lDgst := digest.Digest(lDiffID)
+	if _, ok := s.savedLayers[lDiffID]; ok {
+		return s.savedLayers[lDiffID], nil
+	}
+	layerPath := filepath.Join(outDir, lDgst.Algorithm().String(), lDgst.Encoded())
+	defer layer.ReleaseAndLog(s.lss, l)
+
+	if _, err = os.Stat(layerPath); err == nil {
+		// This is should not happen. If the layer path was already created, we should have returned early.
+		// Log a warning an proceed to recreate the archive.
+		log.G(context.TODO()).WithFields(log.Fields{
+			"layerPath": layerPath,
+			"id":        id,
+			"lDgst":     lDgst,
+		}).Warn("LayerPath already exists but the descriptor is not cached")
+	} else if !os.IsNotExist(err) {
+		return distribution.Descriptor{}, err
+	}
+
+	// We use sequential file access to avoid depleting the standby list on
+	// Windows. On Linux, this equates to a regular os.Create.
+	if err := os.MkdirAll(filepath.Dir(layerPath), 0o755); err != nil {
 		return distribution.Descriptor{}, errors.Wrap(err, "could not create layer dir parent")
 	}
+	tarFile, err := sequential.Create(layerPath)
+	if err != nil {
+		return distribution.Descriptor{}, errors.Wrap(err, "error creating layer file")
+	}
+	defer tarFile.Close()
 
-	if err := os.WriteFile(configPath, imageConfig, 0o644); err != nil {
+	arch, err := l.TarStream()
+	if err != nil {
 		return distribution.Descriptor{}, err
 	}
+	defer arch.Close()
 
-	// serialize filesystem
-	l, err := s.lss.Get(id)
+	digester := digest.Canonical.Digester()
+	digestedArch := io.TeeReader(arch, digester.Hash())
+
+	tarSize, err := io.Copy(tarFile, digestedArch)
 	if err != nil {
 		return distribution.Descriptor{}, err
 	}
 
-	lDgst := digest.Digest(l.DiffID())
-	layerPath := filepath.Join(outDir, lDgst.Algorithm().String(), lDgst.Encoded())
-	defer layer.ReleaseAndLog(s.lss, l)
+	tarDigest := digester.Digest()
+	if lDgst != tarDigest {
+		log.G(context.TODO()).WithFields(log.Fields{
+			"layerDigest":  lDgst,
+			"actualDigest": tarDigest,
+		}).Warn("layer digest doesn't match its tar archive digest")
 
-	if _, err = os.Stat(layerPath); err != nil {
-		if !os.IsNotExist(err) {
-			return distribution.Descriptor{}, err
-		}
+		lDgst = digester.Digest()
+		layerPath = filepath.Join(outDir, lDgst.Algorithm().String(), lDgst.Encoded())
+	}
 
-		// We use sequential file access to avoid depleting the standby list on
-		// Windows. On Linux, this equates to a regular os.Create.
-		if err := os.MkdirAll(filepath.Dir(layerPath), 0o755); err != nil {
-			return distribution.Descriptor{}, errors.Wrap(err, "could not create layer dir parent")
-		}
-		tarFile, err := sequential.Create(layerPath)
-		if err != nil {
-			return distribution.Descriptor{}, errors.Wrap(err, "error creating layer file")
+	if createdTime != nil {
+		for _, fname := range []string{outDir, layerPath} {
+			// todo: maybe save layer created timestamp?
+			if err := system.Chtimes(fname, *createdTime, *createdTime); err != nil {
+				return distribution.Descriptor{}, errors.Wrap(err, "could not set layer timestamp")
+			}
 		}
-		defer tarFile.Close()
+	}
 
-		arch, err := l.TarStream()
-		if err != nil {
-			return distribution.Descriptor{}, err
-		}
-		defer arch.Close()
+	var desc distribution.Descriptor
+	if fs, ok := l.(distribution.Describable); ok {
+		desc = fs.Descriptor()
+	}
 
-		if _, err := io.Copy(tarFile, arch); err != nil {
-			return distribution.Descriptor{}, err
-		}
+	if desc.Digest == "" {
+		desc.Digest = tarDigest
+		desc.Size = tarSize
+	}
+	if desc.MediaType == "" {
+		desc.MediaType = ocispec.MediaTypeImageLayer
+	}
+	s.savedLayers[lDiffID] = desc
 
-		if createdTime != nil {
-			for _, fname := range []string{outDir, configPath, layerPath} {
-				// todo: maybe save layer created timestamp?
-				if err := system.Chtimes(fname, *createdTime, *createdTime); err != nil {
-					return distribution.Descriptor{}, errors.Wrap(err, "could not set layer timestamp")
-				}
-			}
-		}
+	return desc, nil
+}
 
-		s.diffIDPaths[l.DiffID()] = layerPath
-		s.savedLayers[legacyImg.ID] = struct{}{}
+func (s *saveSession) saveConfig(legacyImg image.V1Image, outDir string, createdTime *time.Time) error {
+	imageConfig, err := json.Marshal(legacyImg)
+	if err != nil {
+		return err
 	}
 
-	var src distribution.Descriptor
-	if fs, ok := l.(distribution.Describable); ok {
-		src = fs.Descriptor()
+	cfgDgst := digest.FromBytes(imageConfig)
+	configPath := filepath.Join(outDir, cfgDgst.Algorithm().String(), cfgDgst.Encoded())
+	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
+		return errors.Wrap(err, "could not create layer dir parent")
 	}
 
-	if src.Digest == "" {
-		src = distribution.Descriptor{
-			MediaType: ocispec.MediaTypeImageLayer,
-			Digest:    lDgst,
-			Size:      l.Size(),
+	if err := os.WriteFile(configPath, imageConfig, 0o644); err != nil {
+		return err
+	}
+
+	if createdTime != nil {
+		if err := system.Chtimes(configPath, *createdTime, *createdTime); err != nil {
+			return errors.Wrap(err, "could not set config timestamp")
 		}
 	}
-	return src, nil
+
+	s.savedConfigs[legacyImg.ID] = struct{}{}
+	return nil
 }

+ 8 - 0
integration/image/save_test.go

@@ -119,6 +119,14 @@ func TestSaveCheckManifestLayers(t *testing.T) {
 	assert.NilError(t, json.Unmarshal(manifestData, &manifest))
 
 	assert.Check(t, is.Len(manifest.Layers, len(img.RootFS.Layers)))
+	for _, l := range manifest.Layers {
+		stat, err := fs.Stat(tarfs, "blobs/sha256/"+l.Digest.Encoded())
+		if !assert.Check(t, err) {
+			continue
+		}
+
+		assert.Check(t, is.Equal(l.Size, stat.Size()))
+	}
 }
 
 func TestSaveRepoWithMultipleImages(t *testing.T) {