c8d: make the cache in classic builder work

In order for the cache in the classic builder to work we need to:
- use the came comparison function as the graph drivers implementation
- save the container config when commiting the image
- use all images to search a 'FROM "scratch"' image
- load all images if `cacheFrom` is empty

Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>
Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
This commit is contained in:
Djordje Lukic 2023-10-12 18:29:09 +02:00 committed by Paweł Gronowski
parent 2c47a6df0d
commit 71ebfc7c63
No known key found for this signature in database
GPG key ID: B85EFCFE26DEF92A
8 changed files with 306 additions and 226 deletions

View file

@ -5,6 +5,7 @@ import (
"reflect"
"strings"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
imagetype "github.com/docker/docker/api/types/image"
"github.com/docker/docker/builder"
@ -14,80 +15,232 @@ import (
// MakeImageCache creates a stateful image cache.
func (i *ImageService) MakeImageCache(ctx context.Context, cacheFrom []string) (builder.ImageCache, error) {
images := []*image.Image{}
for _, c := range cacheFrom {
im, err := i.GetImage(ctx, c, imagetype.GetImageOpts{})
if err != nil {
return nil, err
}
images = append(images, im)
if len(cacheFrom) == 0 {
return &localCache{
imageService: i,
}, nil
}
return &imageCache{images: images, c: i}, nil
for _, c := range cacheFrom {
h, err := i.ImageHistory(ctx, c)
if err != nil {
continue
}
for _, hi := range h {
if hi.ID != "<missing>" {
im, err := i.GetImage(ctx, hi.ID, imagetype.GetImageOpts{})
if err != nil {
return nil, err
}
images = append(images, im)
}
}
}
return &imageCache{
lc: &localCache{
imageService: i,
},
images: images,
imageService: i,
}, nil
}
type localCache struct {
imageService *ImageService
}
func (ic *localCache) GetCache(parentID string, cfg *container.Config) (imageID string, err error) {
ctx := context.TODO()
var children []image.ID
// FROM scratch
if parentID == "" {
imgs, err := ic.imageService.Images(ctx, types.ImageListOptions{
All: true,
})
if err != nil {
return "", err
}
for _, img := range imgs {
if img.ParentID == parentID {
children = append(children, image.ID(img.ID))
}
}
} else {
c, err := ic.imageService.Children(ctx, image.ID(parentID))
if err != nil {
return "", err
}
children = c
}
var match *image.Image
for _, child := range children {
childImage, err := ic.imageService.GetImage(ctx, child.String(), imagetype.GetImageOpts{})
if err != nil {
return "", err
}
if isMatch(&childImage.ContainerConfig, cfg) {
if childImage.Created != nil && (match == nil || match.Created.Before(*childImage.Created)) {
match = childImage
}
}
}
if match == nil {
return "", nil
}
return match.ID().String(), nil
}
type imageCache struct {
images []*image.Image
c *ImageService
images []*image.Image
imageService *ImageService
lc *localCache
}
func (ic *imageCache) GetCache(parentID string, cfg *container.Config) (imageID string, err error) {
ctx := context.TODO()
if parentID == "" {
// TODO handle "parentless" image cache lookups ("FROM scratch")
return "", nil
}
parent, err := ic.c.GetImage(ctx, parentID, imagetype.GetImageOpts{})
imgID, err := ic.lc.GetCache(parentID, cfg)
if err != nil {
return "", err
}
for _, localCachedImage := range ic.images {
if isMatch(localCachedImage, parent, cfg) {
return localCachedImage.ID().String(), nil
if imgID != "" {
for _, s := range ic.images {
if ic.isParent(ctx, s, image.ID(imgID)) {
return imgID, nil
}
}
}
children, err := ic.c.Children(ctx, parent.ID())
if err != nil {
return "", err
}
var parent *image.Image
lenHistory := 0
for _, children := range children {
childImage, err := ic.c.GetImage(ctx, children.String(), imagetype.GetImageOpts{})
if parentID != "" {
parent, err = ic.imageService.GetImage(ctx, parentID, imagetype.GetImageOpts{})
if err != nil {
return "", err
}
if isMatch(childImage, parent, cfg) {
return children.String(), nil
lenHistory = len(parent.History)
}
for _, target := range ic.images {
if !isValidParent(target, parent) || !isValidConfig(cfg, target.History[lenHistory]) {
continue
}
return target.ID().String(), nil
}
return "", nil
}
// isMatch checks whether a given target can be used as cache for the given
// parent image/config combination.
// A target can only be an immediate child of the given parent image. For
// a parent image with `n` history entries, a valid target must have `n+1`
// entries and the extra entry must match the provided config
func isMatch(target, parent *image.Image, cfg *container.Config) bool {
if target == nil || parent == nil || cfg == nil {
func isValidConfig(cfg *container.Config, h image.History) bool {
// todo: make this format better than join that loses data
return strings.Join(cfg.Cmd, " ") == h.CreatedBy
}
func isValidParent(img, parent *image.Image) bool {
if len(img.History) == 0 {
return false
}
if parent == nil || len(parent.History) == 0 && len(parent.RootFS.DiffIDs) == 0 {
return true
}
if len(parent.History) >= len(img.History) {
return false
}
if len(parent.RootFS.DiffIDs) > len(img.RootFS.DiffIDs) {
return false
}
if len(target.History) != len(parent.History)+1 ||
len(target.RootFS.DiffIDs) != len(parent.RootFS.DiffIDs)+1 {
for i, h := range parent.History {
if !reflect.DeepEqual(h, img.History[i]) {
return false
}
}
for i, d := range parent.RootFS.DiffIDs {
if d != img.RootFS.DiffIDs[i] {
return false
}
}
return true
}
func (ic *imageCache) isParent(ctx context.Context, img *image.Image, parentID image.ID) bool {
ii, err := ic.imageService.resolveImage(ctx, img.ImageID())
if err != nil {
return false
}
parent, ok := ii.Labels[imageLabelClassicBuilderParent]
if ok {
return parent == parentID.String()
}
p, err := ic.imageService.GetImage(ctx, parentID.String(), imagetype.GetImageOpts{})
if err != nil {
return false
}
return ic.isParent(ctx, p, parentID)
}
// compare two Config struct. Do not compare the "Image" nor "Hostname" fields
// If OpenStdin is set, then it differs
func isMatch(a, b *container.Config) bool {
if a == nil || b == nil ||
a.OpenStdin || b.OpenStdin {
return false
}
if a.AttachStdout != b.AttachStdout ||
a.AttachStderr != b.AttachStderr ||
a.User != b.User ||
a.OpenStdin != b.OpenStdin ||
a.Tty != b.Tty {
return false
}
for i := range parent.History {
if !reflect.DeepEqual(parent.History[i], target.History[i]) {
if len(a.Cmd) != len(b.Cmd) ||
len(a.Env) != len(b.Env) ||
len(a.Labels) != len(b.Labels) ||
len(a.ExposedPorts) != len(b.ExposedPorts) ||
len(a.Entrypoint) != len(b.Entrypoint) ||
len(a.Volumes) != len(b.Volumes) {
return false
}
for i := 0; i < len(a.Cmd); i++ {
if a.Cmd[i] != b.Cmd[i] {
return false
}
}
for i := 0; i < len(a.Env); i++ {
if a.Env[i] != b.Env[i] {
return false
}
}
for k, v := range a.Labels {
if v != b.Labels[k] {
return false
}
}
for k := range a.ExposedPorts {
if _, exists := b.ExposedPorts[k]; !exists {
return false
}
}
childCreatedBy := target.History[len(target.History)-1].CreatedBy
return childCreatedBy == strings.Join(cfg.Cmd, " ")
for i := 0; i < len(a.Entrypoint); i++ {
if a.Entrypoint[i] != b.Entrypoint[i] {
return false
}
}
for key := range a.Volumes {
if _, exists := b.Volumes[key]; !exists {
return false
}
}
return true
}

View file

@ -3,15 +3,10 @@ package containerd
import (
"context"
"github.com/containerd/containerd/content"
cerrdefs "github.com/containerd/containerd/errdefs"
containerdimages "github.com/containerd/containerd/images"
"github.com/containerd/containerd/platforms"
"github.com/containerd/log"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/image"
"github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
)
@ -30,136 +25,33 @@ func (i *ImageService) Children(ctx context.Context, id image.ID) ([]image.ID, e
return children, nil
}
// platformRootfs returns a rootfs for a specified platform.
func platformRootfs(ctx context.Context, store content.Store, desc ocispec.Descriptor, platform ocispec.Platform) (ocispec.RootFS, error) {
empty := ocispec.RootFS{}
configDesc, err := containerdimages.Config(ctx, store, desc, platforms.OnlyStrict(platform))
if err != nil {
return empty, errors.Wrapf(err, "failed to get config for platform %s", platforms.Format(platform))
}
diffs, err := containerdimages.RootFS(ctx, store, configDesc)
if err != nil {
return empty, errors.Wrapf(err, "failed to obtain rootfs")
}
return ocispec.RootFS{
Type: "layers",
DiffIDs: diffs,
}, nil
}
// isRootfsChildOf checks if all layers from parent rootfs are child's first layers
// and child has at least one more layer (to make it not commutative).
// Example:
// A with layers [X, Y],
// B with layers [X, Y, Z]
// C with layers [Y, Z]
//
// Only isRootfsChildOf(B, A) is true.
// Which means that B is considered a children of A. B and C has no children.
// See more examples in TestIsRootfsChildOf.
func isRootfsChildOf(child ocispec.RootFS, parent ocispec.RootFS) bool {
childLen := len(child.DiffIDs)
parentLen := len(parent.DiffIDs)
if childLen <= parentLen {
return false
}
for i := 0; i < parentLen; i++ {
if child.DiffIDs[i] != parent.DiffIDs[i] {
return false
}
}
return true
}
// parents returns a slice of image IDs whose entire rootfs contents match,
// in order, the childs first layers, excluding images with the exact same
// rootfs.
// parents returns a slice of image IDs that are parents of the `id` image
//
// Called from image_delete.go to prune dangling parents.
func (i *ImageService) parents(ctx context.Context, id image.ID) ([]imageWithRootfs, error) {
target, err := i.resolveDescriptor(ctx, id.String())
func (i *ImageService) parents(ctx context.Context, id image.ID) ([]containerdimages.Image, error) {
targetImage, err := i.resolveImage(ctx, id.String())
if err != nil {
return nil, errors.Wrap(err, "failed to get child image")
}
allPlatforms, err := containerdimages.Platforms(ctx, i.content, target)
if err != nil {
return nil, errdefs.System(errors.Wrap(err, "failed to list platforms supported by image"))
}
var imgs []containerdimages.Image
for {
parent, ok := targetImage.Labels[imageLabelClassicBuilderParent]
if !ok || parent == "" {
break
}
var childRootFS []ocispec.RootFS
for _, platform := range allPlatforms {
rootfs, err := platformRootfs(ctx, i.content, target, platform)
parentDigest, err := digest.Parse(parent)
if err != nil {
if cerrdefs.IsNotFound(err) {
continue
}
return nil, errdefs.System(errors.Wrap(err, "failed to get platform-specific rootfs"))
return nil, err
}
childRootFS = append(childRootFS, rootfs)
}
imgs, err := i.images.List(ctx)
if err != nil {
return nil, errdefs.System(errors.Wrap(err, "failed to list all images"))
}
var parents []imageWithRootfs
for _, img := range imgs {
nextImage:
for _, platform := range allPlatforms {
rootfs, err := platformRootfs(ctx, i.content, img.Target, platform)
if err != nil {
if cerrdefs.IsNotFound(err) {
continue
}
return nil, errdefs.System(errors.Wrap(err, "failed to get platform-specific rootfs"))
}
for _, childRoot := range childRootFS {
if isRootfsChildOf(childRoot, rootfs) {
parents = append(parents, imageWithRootfs{
img: img,
rootfs: rootfs,
})
break nextImage
}
}
img, err := i.resolveImage(ctx, parentDigest.String())
if err != nil {
return nil, err
}
imgs = append(imgs, img)
targetImage = img
}
return parents, nil
}
// getParentsByBuilderLabel finds images that were a base for the given image
// by an image label set by the legacy builder.
// NOTE: This only works for images built with legacy builder (not Buildkit).
func (i *ImageService) getParentsByBuilderLabel(ctx context.Context, img containerdimages.Image) ([]containerdimages.Image, error) {
parent, ok := img.Labels[imageLabelClassicBuilderParent]
if !ok || parent == "" {
return nil, nil
}
dgst, err := digest.Parse(parent)
if err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"value": parent,
}).Warnf("invalid %s label value", imageLabelClassicBuilderParent)
return nil, nil
}
return i.images.List(ctx, "target.digest=="+dgst.String())
}
type imageWithRootfs struct {
img containerdimages.Image
rootfs ocispec.RootFS
return imgs, nil
}

View file

@ -144,7 +144,8 @@ func generateCommitImageConfig(baseConfig imagespec.DockerOCIImage, diffID diges
EmptyLayer: diffID == "",
}),
},
Config: containerConfigToDockerOCIImageConfig(opts.Config),
Config: containerConfigToDockerOCIImageConfig(opts.Config),
ContainerConfig: containerConfigToDockerOCIImageConfig(opts.ContainerConfig),
}
}

View file

@ -3,12 +3,12 @@ package containerd
import (
"context"
"fmt"
"sort"
"strings"
"time"
cerrdefs "github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/images"
containerdimages "github.com/containerd/containerd/images"
"github.com/containerd/log"
"github.com/distribution/reference"
"github.com/docker/docker/api/types/events"
@ -215,14 +215,13 @@ func (i *ImageService) deleteAll(ctx context.Context, imgID image.ID, all []imag
}
}()
var parents []imageWithRootfs
var parents []containerdimages.Image
if prune {
// TODO(dmcgowan): Consider using GC labels to walk for deletion
parents, err = i.parents(ctx, imgID)
if err != nil {
log.G(ctx).WithError(err).Warn("failed to get image parents")
}
sortParentsByAffinity(parents)
}
for _, imageRef := range all {
@ -234,15 +233,15 @@ func (i *ImageService) deleteAll(ctx context.Context, imgID image.ID, all []imag
records = append(records, imagetypes.DeleteResponse{Deleted: imgID.String()})
for _, parent := range parents {
if !isDanglingImage(parent.img) {
if !isDanglingImage(parent) {
break
}
err = i.imageDeleteHelper(ctx, parent.img, all, &records, conflictSoft)
err = i.imageDeleteHelper(ctx, parent, all, &records, conflictSoft)
if err != nil {
log.G(ctx).WithError(err).Warn("failed to remove image parent")
break
}
parentID := parent.img.Target.Digest.String()
parentID := parent.Target.Digest.String()
i.LogImageEvent(parentID, parentID, events.ActionDelete)
records = append(records, imagetypes.DeleteResponse{Deleted: parentID})
}
@ -262,17 +261,6 @@ func isImageIDPrefix(imageID, possiblePrefix string) bool {
return false
}
func sortParentsByAffinity(parents []imageWithRootfs) {
sort.Slice(parents, func(i, j int) bool {
lenRootfsI := len(parents[i].rootfs.DiffIDs)
lenRootfsJ := len(parents[j].rootfs.DiffIDs)
if lenRootfsI == lenRootfsJ {
return isDanglingImage(parents[i].img)
}
return lenRootfsI > lenRootfsJ
})
}
// getSameReferences returns the set of images which are the same as:
// - the provided img if non-nil
// - OR the first named image found in the provided image set

View file

@ -5,12 +5,14 @@ import (
"sort"
"github.com/containerd/containerd/images"
containerdimages "github.com/containerd/containerd/images"
cplatforms "github.com/containerd/containerd/platforms"
"github.com/containerd/log"
"github.com/distribution/reference"
imagetype "github.com/docker/docker/api/types/image"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/pkg/platforms"
"github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/identity"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
@ -161,3 +163,24 @@ func getImageTags(ctx context.Context, imgs []images.Image) []string {
return tags
}
// getParentsByBuilderLabel finds images that were a base for the given image
// by an image label set by the legacy builder.
// NOTE: This only works for images built with legacy builder (not Buildkit).
func (i *ImageService) getParentsByBuilderLabel(ctx context.Context, img containerdimages.Image) ([]containerdimages.Image, error) {
parent, ok := img.Labels[imageLabelClassicBuilderParent]
if !ok || parent == "" {
return nil, nil
}
dgst, err := digest.Parse(parent)
if err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"value": parent,
}).Warnf("invalid %s label value", imageLabelClassicBuilderParent)
return nil, nil
}
return i.client.ImageService().List(ctx, "target.digest=="+dgst.String())
}

View file

@ -18,13 +18,14 @@ import (
// - Details
func dockerOciImageToDockerImagePartial(id image.ID, img imagespec.DockerOCIImage) *image.Image {
v1Image := image.V1Image{
DockerVersion: dockerversion.Version,
Config: dockerOCIImageConfigToContainerConfig(img.Config),
Architecture: img.Platform.Architecture,
Variant: img.Platform.Variant,
OS: img.Platform.OS,
Author: img.Author,
Created: img.Created,
DockerVersion: dockerversion.Version,
Config: dockerOCIImageConfigToContainerConfig(img.Config),
ContainerConfig: *dockerOCIImageConfigToContainerConfig(img.ContainerConfig),
Architecture: img.Platform.Architecture,
Variant: img.Platform.Variant,
OS: img.Platform.OS,
Author: img.Author,
Created: img.Created,
}
rootFS := &image.RootFS{
@ -66,7 +67,8 @@ func dockerImageToDockerOCIImage(img image.Image) imagespec.DockerOCIImage {
RootFS: rootfs,
History: img.History,
},
Config: containerConfigToDockerOCIImageConfig(img.Config),
Config: containerConfigToDockerOCIImageConfig(img.Config),
ContainerConfig: containerConfigToDockerOCIImageConfig(&img.ContainerConfig),
}
}

View file

@ -14,6 +14,8 @@ type DockerOCIImage struct {
// Shadow ocispec.Image.Config
Config DockerOCIImageConfig `json:"config,omitempty"`
ContainerConfig DockerOCIImageConfig `json:"container_config,omitempty"`
}
// DockerOCIImageConfig is a ocispec.ImageConfig extended with Docker specific fields.

View file

@ -5447,46 +5447,13 @@ func (s *DockerCLIBuildSuite) TestBuildCacheFrom(c *testing.T) {
assert.Equal(c, strings.Count(result.Combined(), "Using cache"), 0)
cli.DockerCmd(c, "rmi", "build2")
// clear parent images
tempDir, err := os.MkdirTemp("", "test-build-cache-from-")
if err != nil {
c.Fatalf("failed to create temporary directory: %s", tempDir)
}
defer os.RemoveAll(tempDir)
tempFile := filepath.Join(tempDir, "img.tar")
cli.DockerCmd(c, "save", "-o", tempFile, "build1")
cli.DockerCmd(c, "rmi", "build1")
cli.DockerCmd(c, "load", "-i", tempFile)
parentID := cli.DockerCmd(c, "inspect", "-f", "{{.Parent}}", "build1").Combined()
assert.Equal(c, strings.TrimSpace(parentID), "")
// cache still applies without parents
result = cli.BuildCmd(c, "build2", cli.WithFlags("--cache-from=build1"), build.WithExternalBuildContext(ctx))
id2 = getIDByName(c, "build2")
assert.Equal(c, id1, id2)
assert.Equal(c, strings.Count(result.Combined(), "Using cache"), 3)
history1 := cli.DockerCmd(c, "history", "-q", "build2").Combined()
// Retry, no new intermediate images
result = cli.BuildCmd(c, "build3", cli.WithFlags("--cache-from=build1"), build.WithExternalBuildContext(ctx))
id3 := getIDByName(c, "build3")
assert.Equal(c, id1, id3)
assert.Equal(c, strings.Count(result.Combined(), "Using cache"), 3)
history2 := cli.DockerCmd(c, "history", "-q", "build3").Combined()
assert.Equal(c, history1, history2)
cli.DockerCmd(c, "rmi", "build2")
cli.DockerCmd(c, "rmi", "build3")
cli.DockerCmd(c, "rmi", "build1")
cli.DockerCmd(c, "load", "-i", tempFile)
// Modify file, everything up to last command and layers are reused
dockerfile = `
FROM busybox
ENV FOO=bar
ADD baz /
RUN touch newfile`
err = os.WriteFile(filepath.Join(ctx.Dir, "Dockerfile"), []byte(dockerfile), 0o644)
err := os.WriteFile(filepath.Join(ctx.Dir, "Dockerfile"), []byte(dockerfile), 0o644)
assert.NilError(c, err)
result = cli.BuildCmd(c, "build2", cli.WithFlags("--cache-from=build1"), build.WithExternalBuildContext(ctx))
@ -5509,6 +5476,58 @@ func (s *DockerCLIBuildSuite) TestBuildCacheFrom(c *testing.T) {
assert.Assert(c, layers1[len(layers1)-1] != layers2[len(layers1)-1])
}
func (s *DockerCLIBuildSuite) TestBuildCacheFromLoad(c *testing.T) {
skip.If(c, testEnv.UsingSnapshotter, "Parent-child relations are lost when save/load-ing with the containerd image store")
testRequires(c, DaemonIsLinux) // All tests that do save are skipped in windows
dockerfile := `
FROM busybox
ENV FOO=bar
ADD baz /
RUN touch bax`
ctx := fakecontext.New(c, "",
fakecontext.WithDockerfile(dockerfile),
fakecontext.WithFiles(map[string]string{
"Dockerfile": dockerfile,
"baz": "baz",
}))
defer ctx.Close()
cli.BuildCmd(c, "build1", build.WithExternalBuildContext(ctx))
id1 := getIDByName(c, "build1")
// clear parent images
tempDir, err := os.MkdirTemp("", "test-build-cache-from-")
if err != nil {
c.Fatalf("failed to create temporary directory: %s", tempDir)
}
defer os.RemoveAll(tempDir)
tempFile := filepath.Join(tempDir, "img.tar")
cli.DockerCmd(c, "save", "-o", tempFile, "build1")
cli.DockerCmd(c, "rmi", "build1")
cli.DockerCmd(c, "load", "-i", tempFile)
parentID := cli.DockerCmd(c, "inspect", "-f", "{{.Parent}}", "build1").Combined()
assert.Equal(c, strings.TrimSpace(parentID), "")
// cache still applies without parents
result := cli.BuildCmd(c, "build2", cli.WithFlags("--cache-from=build1"), build.WithExternalBuildContext(ctx))
id2 := getIDByName(c, "build2")
assert.Equal(c, id1, id2)
assert.Equal(c, strings.Count(result.Combined(), "Using cache"), 3)
history1 := cli.DockerCmd(c, "history", "-q", "build2").Combined()
// Retry, no new intermediate images
result = cli.BuildCmd(c, "build3", cli.WithFlags("--cache-from=build1"), build.WithExternalBuildContext(ctx))
id3 := getIDByName(c, "build3")
assert.Equal(c, id1, id3)
assert.Equal(c, strings.Count(result.Combined(), "Using cache"), 3)
history2 := cli.DockerCmd(c, "history", "-q", "build3").Combined()
assert.Equal(c, history1, history2)
cli.DockerCmd(c, "rmi", "build2")
cli.DockerCmd(c, "rmi", "build3")
cli.DockerCmd(c, "rmi", "build1")
cli.DockerCmd(c, "load", "-i", tempFile)
}
func (s *DockerCLIBuildSuite) TestBuildMultiStageCache(c *testing.T) {
testRequires(c, DaemonIsLinux) // All tests that do save are skipped in windows
dockerfile := `