diff --git a/image/tarexport/save.go b/image/tarexport/save.go index 67ad4c1a54..bd676d432a 100644 --- a/image/tarexport/save.go +++ b/image/tarexport/save.go @@ -1,6 +1,7 @@ package tarexport // import "github.com/docker/docker/image/tarexport" import ( + "context" "encoding/json" "fmt" "io" @@ -10,6 +11,7 @@ import ( "time" "github.com/containerd/containerd/images" + "github.com/containerd/log" "github.com/distribution/reference" "github.com/docker/distribution" "github.com/docker/docker/api/types/events" @@ -34,10 +36,10 @@ type imageDescriptor struct { type saveSession struct { *tarexporter - outDir string - images map[image.ID]*imageDescriptor - savedLayers map[string]struct{} - diffIDPaths map[layer.DiffID]string // cache every diffID blob to avoid duplicates + outDir string + images map[image.ID]*imageDescriptor + savedLayers map[layer.DiffID]distribution.Descriptor + savedConfigs map[string]struct{} } func (l *tarexporter) Save(names []string, outStream io.Writer) error { @@ -178,8 +180,8 @@ func (l *tarexporter) releaseLayerReferences(imgDescr map[image.ID]*imageDescrip } func (s *saveSession) save(outStream io.Writer) error { - s.savedLayers = make(map[string]struct{}) - s.diffIDPaths = make(map[layer.DiffID]string) + s.savedConfigs = make(map[string]struct{}) + s.savedLayers = make(map[layer.DiffID]distribution.Descriptor) // get image json tempDir, err := os.MkdirTemp("", "docker-export-") @@ -403,7 +405,7 @@ func (s *saveSession) saveImage(id image.ID) (map[layer.DiffID]distribution.Desc } v1Img.OS = img.OS - src, err := s.saveLayer(rootFS.ChainID(), v1Img, img.Created) + src, err := s.saveConfigAndLayer(rootFS.ChainID(), v1Img, img.Created) if err != nil { return nil, err } @@ -448,26 +450,13 @@ func (s *saveSession) saveImage(id image.ID) (map[layer.DiffID]distribution.Desc return foreignSrcs, nil } -func (s *saveSession) saveLayer(id layer.ChainID, legacyImg image.V1Image, createdTime *time.Time) (distribution.Descriptor, error) { - if _, exists := s.savedLayers[legacyImg.ID]; exists { - return distribution.Descriptor{}, nil - } - +func (s *saveSession) saveConfigAndLayer(id layer.ChainID, legacyImg image.V1Image, createdTime *time.Time) (distribution.Descriptor, error) { outDir := filepath.Join(s.outDir, ocispec.ImageBlobsDir) - imageConfig, err := json.Marshal(legacyImg) - if err != nil { - return distribution.Descriptor{}, err - } - - cfgDgst := digest.FromBytes(imageConfig) - configPath := filepath.Join(outDir, cfgDgst.Algorithm().String(), cfgDgst.Encoded()) - if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil { - return distribution.Descriptor{}, errors.Wrap(err, "could not create layer dir parent") - } - - if err := os.WriteFile(configPath, imageConfig, 0o644); err != nil { - return distribution.Descriptor{}, err + if _, ok := s.savedConfigs[legacyImg.ID]; !ok { + if err := s.saveConfig(legacyImg, outDir, createdTime); err != nil { + return distribution.Descriptor{}, err + } } // serialize filesystem @@ -476,60 +465,110 @@ func (s *saveSession) saveLayer(id layer.ChainID, legacyImg image.V1Image, creat return distribution.Descriptor{}, err } - lDgst := digest.Digest(l.DiffID()) + lDiffID := l.DiffID() + lDgst := digest.Digest(lDiffID) + if _, ok := s.savedLayers[lDiffID]; ok { + return s.savedLayers[lDiffID], nil + } layerPath := filepath.Join(outDir, lDgst.Algorithm().String(), lDgst.Encoded()) defer layer.ReleaseAndLog(s.lss, l) - if _, err = os.Stat(layerPath); err != nil { - if !os.IsNotExist(err) { - return distribution.Descriptor{}, err - } + if _, err = os.Stat(layerPath); err == nil { + // This is should not happen. If the layer path was already created, we should have returned early. + // Log a warning an proceed to recreate the archive. + log.G(context.TODO()).WithFields(log.Fields{ + "layerPath": layerPath, + "id": id, + "lDgst": lDgst, + }).Warn("LayerPath already exists but the descriptor is not cached") + } else if !os.IsNotExist(err) { + return distribution.Descriptor{}, err + } - // We use sequential file access to avoid depleting the standby list on - // Windows. On Linux, this equates to a regular os.Create. - if err := os.MkdirAll(filepath.Dir(layerPath), 0o755); err != nil { - return distribution.Descriptor{}, errors.Wrap(err, "could not create layer dir parent") - } - tarFile, err := sequential.Create(layerPath) - if err != nil { - return distribution.Descriptor{}, errors.Wrap(err, "error creating layer file") - } - defer tarFile.Close() + // We use sequential file access to avoid depleting the standby list on + // Windows. On Linux, this equates to a regular os.Create. + if err := os.MkdirAll(filepath.Dir(layerPath), 0o755); err != nil { + return distribution.Descriptor{}, errors.Wrap(err, "could not create layer dir parent") + } + tarFile, err := sequential.Create(layerPath) + if err != nil { + return distribution.Descriptor{}, errors.Wrap(err, "error creating layer file") + } + defer tarFile.Close() - arch, err := l.TarStream() - if err != nil { - return distribution.Descriptor{}, err - } - defer arch.Close() + arch, err := l.TarStream() + if err != nil { + return distribution.Descriptor{}, err + } + defer arch.Close() - if _, err := io.Copy(tarFile, arch); err != nil { - return distribution.Descriptor{}, err - } + digester := digest.Canonical.Digester() + digestedArch := io.TeeReader(arch, digester.Hash()) - if createdTime != nil { - for _, fname := range []string{outDir, configPath, layerPath} { - // todo: maybe save layer created timestamp? - if err := system.Chtimes(fname, *createdTime, *createdTime); err != nil { - return distribution.Descriptor{}, errors.Wrap(err, "could not set layer timestamp") - } + tarSize, err := io.Copy(tarFile, digestedArch) + if err != nil { + return distribution.Descriptor{}, err + } + + tarDigest := digester.Digest() + if lDgst != tarDigest { + log.G(context.TODO()).WithFields(log.Fields{ + "layerDigest": lDgst, + "actualDigest": tarDigest, + }).Warn("layer digest doesn't match its tar archive digest") + + lDgst = digester.Digest() + layerPath = filepath.Join(outDir, lDgst.Algorithm().String(), lDgst.Encoded()) + } + + if createdTime != nil { + for _, fname := range []string{outDir, layerPath} { + // todo: maybe save layer created timestamp? + if err := system.Chtimes(fname, *createdTime, *createdTime); err != nil { + return distribution.Descriptor{}, errors.Wrap(err, "could not set layer timestamp") } } - - s.diffIDPaths[l.DiffID()] = layerPath - s.savedLayers[legacyImg.ID] = struct{}{} } - var src distribution.Descriptor + var desc distribution.Descriptor if fs, ok := l.(distribution.Describable); ok { - src = fs.Descriptor() + desc = fs.Descriptor() } - if src.Digest == "" { - src = distribution.Descriptor{ - MediaType: ocispec.MediaTypeImageLayer, - Digest: lDgst, - Size: l.Size(), + if desc.Digest == "" { + desc.Digest = tarDigest + desc.Size = tarSize + } + if desc.MediaType == "" { + desc.MediaType = ocispec.MediaTypeImageLayer + } + s.savedLayers[lDiffID] = desc + + return desc, nil +} + +func (s *saveSession) saveConfig(legacyImg image.V1Image, outDir string, createdTime *time.Time) error { + imageConfig, err := json.Marshal(legacyImg) + if err != nil { + return err + } + + cfgDgst := digest.FromBytes(imageConfig) + configPath := filepath.Join(outDir, cfgDgst.Algorithm().String(), cfgDgst.Encoded()) + if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil { + return errors.Wrap(err, "could not create layer dir parent") + } + + if err := os.WriteFile(configPath, imageConfig, 0o644); err != nil { + return err + } + + if createdTime != nil { + if err := system.Chtimes(configPath, *createdTime, *createdTime); err != nil { + return errors.Wrap(err, "could not set config timestamp") } } - return src, nil + + s.savedConfigs[legacyImg.ID] = struct{}{} + return nil } diff --git a/integration/image/save_test.go b/integration/image/save_test.go index 95c82e3b94..6fec2c779f 100644 --- a/integration/image/save_test.go +++ b/integration/image/save_test.go @@ -119,6 +119,14 @@ func TestSaveCheckManifestLayers(t *testing.T) { assert.NilError(t, json.Unmarshal(manifestData, &manifest)) assert.Check(t, is.Len(manifest.Layers, len(img.RootFS.Layers))) + for _, l := range manifest.Layers { + stat, err := fs.Stat(tarfs, "blobs/sha256/"+l.Digest.Encoded()) + if !assert.Check(t, err) { + continue + } + + assert.Check(t, is.Equal(l.Size, stat.Size())) + } } func TestSaveRepoWithMultipleImages(t *testing.T) {