image/save: Refactor saveLayer

`diffIDPaths` is not used and can be removed.

`savedConfig` stores if the config was already saved (ID of the image is
the ID of the config).
`savedLayers` stores if the layer (diff ID) was already saved.

Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
This commit is contained in:
Paweł Gronowski 2024-01-15 16:31:11 +01:00
parent c58acf06c2
commit 9f50e06993
No known key found for this signature in database
GPG key ID: B85EFCFE26DEF92A

View file

@ -1,6 +1,7 @@
package tarexport // import "github.com/docker/docker/image/tarexport"
import (
"context"
"encoding/json"
"fmt"
"io"
@ -10,6 +11,7 @@ import (
"time"
"github.com/containerd/containerd/images"
"github.com/containerd/log"
"github.com/distribution/reference"
"github.com/docker/distribution"
"github.com/docker/docker/api/types/events"
@ -34,11 +36,10 @@ type imageDescriptor struct {
type saveSession struct {
*tarexporter
outDir string
images map[image.ID]*imageDescriptor
savedLayers map[string]struct{}
diffIDPaths map[layer.DiffID]string // cache every diffID blob to avoid duplicates
diffIDDescs map[layer.DiffID]distribution.Descriptor
outDir string
images map[image.ID]*imageDescriptor
savedLayers map[layer.DiffID]distribution.Descriptor
savedConfigs map[string]struct{}
}
func (l *tarexporter) Save(names []string, outStream io.Writer) error {
@ -179,9 +180,8 @@ func (l *tarexporter) releaseLayerReferences(imgDescr map[image.ID]*imageDescrip
}
func (s *saveSession) save(outStream io.Writer) error {
s.savedLayers = make(map[string]struct{})
s.diffIDPaths = make(map[layer.DiffID]string)
s.diffIDDescs = make(map[layer.DiffID]distribution.Descriptor)
s.savedConfigs = make(map[string]struct{})
s.savedLayers = make(map[layer.DiffID]distribution.Descriptor)
// get image json
tempDir, err := os.MkdirTemp("", "docker-export-")
@ -405,7 +405,7 @@ func (s *saveSession) saveImage(id image.ID) (map[layer.DiffID]distribution.Desc
}
v1Img.OS = img.OS
src, err := s.saveLayer(rootFS.ChainID(), v1Img, img.Created)
src, err := s.saveConfigAndLayer(rootFS.ChainID(), v1Img, img.Created)
if err != nil {
return nil, err
}
@ -450,26 +450,13 @@ func (s *saveSession) saveImage(id image.ID) (map[layer.DiffID]distribution.Desc
return foreignSrcs, nil
}
func (s *saveSession) saveLayer(id layer.ChainID, legacyImg image.V1Image, createdTime *time.Time) (distribution.Descriptor, error) {
if _, exists := s.savedLayers[legacyImg.ID]; exists {
return distribution.Descriptor{}, nil
}
func (s *saveSession) saveConfigAndLayer(id layer.ChainID, legacyImg image.V1Image, createdTime *time.Time) (distribution.Descriptor, error) {
outDir := filepath.Join(s.outDir, ocispec.ImageBlobsDir)
imageConfig, err := json.Marshal(legacyImg)
if err != nil {
return distribution.Descriptor{}, err
}
cfgDgst := digest.FromBytes(imageConfig)
configPath := filepath.Join(outDir, cfgDgst.Algorithm().String(), cfgDgst.Encoded())
if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
return distribution.Descriptor{}, errors.Wrap(err, "could not create layer dir parent")
}
if err := os.WriteFile(configPath, imageConfig, 0o644); err != nil {
return distribution.Descriptor{}, err
if _, ok := s.savedConfigs[legacyImg.ID]; !ok {
if err := s.saveConfig(legacyImg, outDir, createdTime); err != nil {
return distribution.Descriptor{}, err
}
}
// serialize filesystem
@ -478,66 +465,110 @@ func (s *saveSession) saveLayer(id layer.ChainID, legacyImg image.V1Image, creat
return distribution.Descriptor{}, err
}
lDgst := digest.Digest(l.DiffID())
lDiffID := l.DiffID()
lDgst := digest.Digest(lDiffID)
if _, ok := s.savedLayers[lDiffID]; ok {
return s.savedLayers[lDiffID], nil
}
layerPath := filepath.Join(outDir, lDgst.Algorithm().String(), lDgst.Encoded())
defer layer.ReleaseAndLog(s.lss, l)
if _, err = os.Stat(layerPath); err != nil {
if !os.IsNotExist(err) {
return distribution.Descriptor{}, err
}
// We use sequential file access to avoid depleting the standby list on
// Windows. On Linux, this equates to a regular os.Create.
if err := os.MkdirAll(filepath.Dir(layerPath), 0o755); err != nil {
return distribution.Descriptor{}, errors.Wrap(err, "could not create layer dir parent")
}
tarFile, err := sequential.Create(layerPath)
if err != nil {
return distribution.Descriptor{}, errors.Wrap(err, "error creating layer file")
}
defer tarFile.Close()
arch, err := l.TarStream()
if err != nil {
return distribution.Descriptor{}, err
}
defer arch.Close()
digester := digest.Canonical.Digester()
digestedArch := io.TeeReader(arch, digester.Hash())
tarSize, err := io.Copy(tarFile, digestedArch)
if err != nil {
return distribution.Descriptor{}, err
}
if createdTime != nil {
for _, fname := range []string{outDir, configPath, layerPath} {
// todo: maybe save layer created timestamp?
if err := system.Chtimes(fname, *createdTime, *createdTime); err != nil {
return distribution.Descriptor{}, errors.Wrap(err, "could not set layer timestamp")
}
}
}
s.diffIDPaths[l.DiffID()] = layerPath
s.savedLayers[legacyImg.ID] = struct{}{}
var desc distribution.Descriptor
if fs, ok := l.(distribution.Describable); ok {
desc = fs.Descriptor()
}
if desc.Digest == "" {
desc.Digest = digester.Digest()
desc.Size = tarSize
}
if desc.MediaType == "" {
desc.MediaType = ocispec.MediaTypeImageLayer
}
s.diffIDDescs[l.DiffID()] = desc
if _, err = os.Stat(layerPath); err == nil {
// This is should not happen. If the layer path was already created, we should have returned early.
// Log a warning an proceed to recreate the archive.
log.G(context.TODO()).WithFields(log.Fields{
"layerPath": layerPath,
"id": id,
"lDgst": lDgst,
}).Warn("LayerPath already exists but the descriptor is not cached")
} else if !os.IsNotExist(err) {
return distribution.Descriptor{}, err
}
return s.diffIDDescs[l.DiffID()], nil
// We use sequential file access to avoid depleting the standby list on
// Windows. On Linux, this equates to a regular os.Create.
if err := os.MkdirAll(filepath.Dir(layerPath), 0o755); err != nil {
return distribution.Descriptor{}, errors.Wrap(err, "could not create layer dir parent")
}
tarFile, err := sequential.Create(layerPath)
if err != nil {
return distribution.Descriptor{}, errors.Wrap(err, "error creating layer file")
}
defer tarFile.Close()
arch, err := l.TarStream()
if err != nil {
return distribution.Descriptor{}, err
}
defer arch.Close()
digester := digest.Canonical.Digester()
digestedArch := io.TeeReader(arch, digester.Hash())
tarSize, err := io.Copy(tarFile, digestedArch)
if err != nil {
return distribution.Descriptor{}, err
}
tarDigest := digester.Digest()
if lDgst != tarDigest {
log.G(context.TODO()).WithFields(log.Fields{
"layerDigest": lDgst,
"actualDigest": tarDigest,
}).Warn("layer digest doesn't match its tar archive digest")
lDgst = digester.Digest()
layerPath = filepath.Join(outDir, lDgst.Algorithm().String(), lDgst.Encoded())
}
if createdTime != nil {
for _, fname := range []string{outDir, layerPath} {
// todo: maybe save layer created timestamp?
if err := system.Chtimes(fname, *createdTime, *createdTime); err != nil {
return distribution.Descriptor{}, errors.Wrap(err, "could not set layer timestamp")
}
}
}
var desc distribution.Descriptor
if fs, ok := l.(distribution.Describable); ok {
desc = fs.Descriptor()
}
if desc.Digest == "" {
desc.Digest = tarDigest
desc.Size = tarSize
}
if desc.MediaType == "" {
desc.MediaType = ocispec.MediaTypeImageLayer
}
s.savedLayers[lDiffID] = desc
return desc, nil
}
func (s *saveSession) saveConfig(legacyImg image.V1Image, outDir string, createdTime *time.Time) error {
imageConfig, err := json.Marshal(legacyImg)
if err != nil {
return err
}
cfgDgst := digest.FromBytes(imageConfig)
configPath := filepath.Join(outDir, cfgDgst.Algorithm().String(), cfgDgst.Encoded())
if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
return errors.Wrap(err, "could not create layer dir parent")
}
if err := os.WriteFile(configPath, imageConfig, 0o644); err != nil {
return err
}
if createdTime != nil {
if err := system.Chtimes(configPath, *createdTime, *createdTime); err != nil {
return errors.Wrap(err, "could not set config timestamp")
}
}
s.savedConfigs[legacyImg.ID] = struct{}{}
return nil
}