Browse Source

Merge pull request #2809 from graydon/880-cache-ADD-commands-in-dockerfiles

Issue #880 - cache ADD commands in dockerfiles
Guillaume J. Charmes 11 years ago
parent
commit
efaf2cac5c
4 changed files with 274 additions and 84 deletions
  1. 1 0
      AUTHORS
  2. 4 0
      CHANGELOG.md
  3. 176 57
      buildfile.go
  4. 93 27
      integration/buildfile_test.go

+ 1 - 0
AUTHORS

@@ -69,6 +69,7 @@ Francisco Souza <f@souza.cc>
 Frederick F. Kautz IV <fkautz@alumni.cmu.edu>
 Frederick F. Kautz IV <fkautz@alumni.cmu.edu>
 Gabriel Monroy <gabriel@opdemand.com>
 Gabriel Monroy <gabriel@opdemand.com>
 Gareth Rushgrove <gareth@morethanseven.net>
 Gareth Rushgrove <gareth@morethanseven.net>
+Graydon Hoare <graydon@pobox.com>
 Greg Thornton <xdissent@me.com>
 Greg Thornton <xdissent@me.com>
 Guillaume J. Charmes <guillaume.charmes@dotcloud.com>
 Guillaume J. Charmes <guillaume.charmes@dotcloud.com>
 Gurjeet Singh <gurjeet@singh.im>
 Gurjeet Singh <gurjeet@singh.im>

+ 4 - 0
CHANGELOG.md

@@ -1,5 +1,9 @@
 # Changelog
 # Changelog
 
 
+#### Builder
+
+- ADD now uses image cache, based on sha256 of added content.
+
 ## 0.7.2 (2013-12-16)
 ## 0.7.2 (2013-12-16)
 
 
 #### Runtime
 #### Runtime

+ 176 - 57
buildfile.go

@@ -1,6 +1,9 @@
 package docker
 package docker
 
 
 import (
 import (
+	"archive/tar"
+	"crypto/sha256"
+	"encoding/hex"
 	"encoding/json"
 	"encoding/json"
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
@@ -12,9 +15,11 @@ import (
 	"net/url"
 	"net/url"
 	"os"
 	"os"
 	"path"
 	"path"
+	"path/filepath"
 	"reflect"
 	"reflect"
 	"regexp"
 	"regexp"
 	"strings"
 	"strings"
+	"time"
 )
 )
 
 
 var (
 var (
@@ -92,6 +97,87 @@ func (b *buildFile) CmdMaintainer(name string) error {
 	return b.commit("", b.config.Cmd, fmt.Sprintf("MAINTAINER %s", name))
 	return b.commit("", b.config.Cmd, fmt.Sprintf("MAINTAINER %s", name))
 }
 }
 
 
+// probeCache checks to see if image-caching is enabled (`b.utilizeCache`)
+// and if so attempts to look up the current `b.image` and `b.config` pair
+// in the current server `b.srv`. If an image is found, probeCache returns
+// `(true, nil)`. If no image is found, it returns `(false, nil)`. If there
+// is any error, it returns `(false, err)`.
+func (b *buildFile) probeCache() (bool, error) {
+	if b.utilizeCache {
+		if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil {
+			return false, err
+		} else if cache != nil {
+			fmt.Fprintf(b.outStream, " ---> Using cache\n")
+			utils.Debugf("[BUILDER] Use cached version")
+			b.image = cache.ID
+			return true, nil
+		} else {
+			utils.Debugf("[BUILDER] Cache miss")
+		}
+	}
+	return false, nil
+}
+
+// hashPath calculates a strong hash (sha256) value for a file tree located
+// at `basepth`/`pth`, including all attributes that would normally be
+// captured by `tar`. The path to hash is passed in two pieces only to
+// permit logging the second piece in isolation, assuming the first is a
+// temporary directory in which docker is running. If `clobberTimes` is
+// true and hashPath is applied to a single file, the ctime/atime/mtime of
+// the file is considered to be unix time 0, for purposes of hashing.
+func (b *buildFile) hashPath(basePth, pth string, clobberTimes bool) (string, error) {
+
+	p := path.Join(basePth, pth)
+
+	st, err := os.Stat(p)
+	if err != nil {
+		return "", err
+	}
+
+	h := sha256.New()
+
+	if st.IsDir() {
+		tarRd, err := archive.Tar(p, archive.Uncompressed)
+		if err != nil {
+			return "", err
+		}
+		_, err = io.Copy(h, tarRd)
+		if err != nil {
+			return "", err
+		}
+
+	} else {
+		hdr, err := tar.FileInfoHeader(st, "")
+		if err != nil {
+			return "", err
+		}
+		if clobberTimes {
+			hdr.AccessTime = time.Unix(0, 0)
+			hdr.ChangeTime = time.Unix(0, 0)
+			hdr.ModTime = time.Unix(0, 0)
+		}
+		hdr.Name = filepath.Base(p)
+		tarWr := tar.NewWriter(h)
+		if err := tarWr.WriteHeader(hdr); err != nil {
+			return "", err
+		}
+
+		fileRd, err := os.Open(p)
+		if err != nil {
+			return "", err
+		}
+
+		if _, err = io.Copy(tarWr, fileRd); err != nil {
+			return "", err
+		}
+		tarWr.Close()
+	}
+
+	hstr := hex.EncodeToString(h.Sum(nil))
+	fmt.Fprintf(b.outStream, " ---> data at %s has sha256 %.12s...\n", pth, hstr)
+	return hstr, nil
+}
+
 func (b *buildFile) CmdRun(args string) error {
 func (b *buildFile) CmdRun(args string) error {
 	if b.image == "" {
 	if b.image == "" {
 		return fmt.Errorf("Please provide a source image with `from` prior to run")
 		return fmt.Errorf("Please provide a source image with `from` prior to run")
@@ -109,17 +195,12 @@ func (b *buildFile) CmdRun(args string) error {
 
 
 	utils.Debugf("Command to be executed: %v", b.config.Cmd)
 	utils.Debugf("Command to be executed: %v", b.config.Cmd)
 
 
-	if b.utilizeCache {
-		if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil {
-			return err
-		} else if cache != nil {
-			fmt.Fprintf(b.outStream, " ---> Using cache\n")
-			utils.Debugf("[BUILDER] Use cached version")
-			b.image = cache.ID
-			return nil
-		} else {
-			utils.Debugf("[BUILDER] Cache miss")
-		}
+	hit, err := b.probeCache()
+	if err != nil {
+		return err
+	}
+	if hit {
+		return nil
 	}
 	}
 
 
 	cid, err := b.run()
 	cid, err := b.run()
@@ -265,32 +346,16 @@ func (b *buildFile) CmdVolume(args string) error {
 	return nil
 	return nil
 }
 }
 
 
-func (b *buildFile) addRemote(container *Container, orig, dest string) error {
-	file, err := utils.Download(orig)
-	if err != nil {
-		return err
+func (b *buildFile) checkPathForAddition(orig string) error {
+	origPath := path.Join(b.context, orig)
+	if !strings.HasPrefix(origPath, b.context) {
+		return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath)
 	}
 	}
-	defer file.Body.Close()
-
-	// If the destination is a directory, figure out the filename.
-	if strings.HasSuffix(dest, "/") {
-		u, err := url.Parse(orig)
-		if err != nil {
-			return err
-		}
-		path := u.Path
-		if strings.HasSuffix(path, "/") {
-			path = path[:len(path)-1]
-		}
-		parts := strings.Split(path, "/")
-		filename := parts[len(parts)-1]
-		if filename == "" {
-			return fmt.Errorf("cannot determine filename from url: %s", u)
-		}
-		dest = dest + filename
+	_, err := os.Stat(origPath)
+	if err != nil {
+		return fmt.Errorf("%s: no such file or directory", orig)
 	}
 	}
-
-	return container.Inject(file.Body, dest)
+	return nil
 }
 }
 
 
 func (b *buildFile) addContext(container *Container, orig, dest string) error {
 func (b *buildFile) addContext(container *Container, orig, dest string) error {
@@ -300,9 +365,6 @@ func (b *buildFile) addContext(container *Container, orig, dest string) error {
 	if strings.HasSuffix(dest, "/") {
 	if strings.HasSuffix(dest, "/") {
 		destPath = destPath + "/"
 		destPath = destPath + "/"
 	}
 	}
-	if !strings.HasPrefix(origPath, b.context) {
-		return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath)
-	}
 	fi, err := os.Stat(origPath)
 	fi, err := os.Stat(origPath)
 	if err != nil {
 	if err != nil {
 		return fmt.Errorf("%s: no such file or directory", orig)
 		return fmt.Errorf("%s: no such file or directory", orig)
@@ -348,6 +410,74 @@ func (b *buildFile) CmdAdd(args string) error {
 	b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", orig, dest)}
 	b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", orig, dest)}
 
 
 	b.config.Image = b.image
 	b.config.Image = b.image
+
+	origPath := orig
+	destPath := dest
+	clobberTimes := false
+
+	if utils.IsURL(orig) {
+
+		clobberTimes = true
+
+		resp, err := utils.Download(orig)
+		if err != nil {
+			return err
+		}
+		tmpDirName, err := ioutil.TempDir(b.context, "docker-remote")
+		if err != nil {
+			return err
+		}
+		tmpFileName := path.Join(tmpDirName, "tmp")
+		tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
+		if err != nil {
+			return err
+		}
+		defer os.RemoveAll(tmpDirName)
+		if _, err = io.Copy(tmpFile, resp.Body); err != nil {
+			return err
+		}
+		origPath = path.Join(filepath.Base(tmpDirName), filepath.Base(tmpFileName))
+		tmpFile.Close()
+
+		// If the destination is a directory, figure out the filename.
+		if strings.HasSuffix(dest, "/") {
+			u, err := url.Parse(orig)
+			if err != nil {
+				return err
+			}
+			path := u.Path
+			if strings.HasSuffix(path, "/") {
+				path = path[:len(path)-1]
+			}
+			parts := strings.Split(path, "/")
+			filename := parts[len(parts)-1]
+			if filename == "" {
+				return fmt.Errorf("cannot determine filename from url: %s", u)
+			}
+			destPath = dest + filename
+		}
+	}
+
+	if err := b.checkPathForAddition(origPath); err != nil {
+		return err
+	}
+
+	// Hash path and check the cache
+	if b.utilizeCache {
+		hash, err := b.hashPath(b.context, origPath, clobberTimes)
+		if err != nil {
+			return err
+		}
+		b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", hash, dest)}
+		hit, err := b.probeCache()
+		if err != nil {
+			return err
+		}
+		if hit {
+			return nil
+		}
+	}
+
 	// Create the container and start it
 	// Create the container and start it
 	container, _, err := b.runtime.Create(b.config, "")
 	container, _, err := b.runtime.Create(b.config, "")
 	if err != nil {
 	if err != nil {
@@ -360,14 +490,8 @@ func (b *buildFile) CmdAdd(args string) error {
 	}
 	}
 	defer container.Unmount()
 	defer container.Unmount()
 
 
-	if utils.IsURL(orig) {
-		if err := b.addRemote(container, orig, dest); err != nil {
-			return err
-		}
-	} else {
-		if err := b.addContext(container, orig, dest); err != nil {
-			return err
-		}
+	if err := b.addContext(container, origPath, destPath); err != nil {
+		return err
 	}
 	}
 
 
 	if err := b.commit(container.ID, cmd, fmt.Sprintf("ADD %s in %s", orig, dest)); err != nil {
 	if err := b.commit(container.ID, cmd, fmt.Sprintf("ADD %s in %s", orig, dest)); err != nil {
@@ -465,17 +589,12 @@ func (b *buildFile) commit(id string, autoCmd []string, comment string) error {
 		b.config.Cmd = []string{"/bin/sh", "-c", "#(nop) " + comment}
 		b.config.Cmd = []string{"/bin/sh", "-c", "#(nop) " + comment}
 		defer func(cmd []string) { b.config.Cmd = cmd }(cmd)
 		defer func(cmd []string) { b.config.Cmd = cmd }(cmd)
 
 
-		if b.utilizeCache {
-			if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil {
-				return err
-			} else if cache != nil {
-				fmt.Fprintf(b.outStream, " ---> Using cache\n")
-				utils.Debugf("[BUILDER] Use cached version")
-				b.image = cache.ID
-				return nil
-			} else {
-				utils.Debugf("[BUILDER] Cache miss")
-			}
+		hit, err := b.probeCache()
+		if err != nil {
+			return err
+		}
+		if hit {
+			return nil
 		}
 		}
 
 
 		container, warnings, err := b.runtime.Create(b.config, "")
 		container, warnings, err := b.runtime.Create(b.config, "")

+ 93 - 27
integration/buildfile_test.go

@@ -425,16 +425,10 @@ func TestBuildEntrypointRunCleanup(t *testing.T) {
 	}
 	}
 }
 }
 
 
-func TestBuildImageWithCache(t *testing.T) {
+func checkCacheBehavior(t *testing.T, template testContextTemplate, expectHit bool) {
 	eng := NewTestEngine(t)
 	eng := NewTestEngine(t)
 	defer nuke(mkRuntimeFromEngine(eng, t))
 	defer nuke(mkRuntimeFromEngine(eng, t))
 
 
-	template := testContextTemplate{`
-        from {IMAGE}
-        maintainer dockerio
-        `,
-		nil, nil}
-
 	img, err := buildImage(template, t, eng, true)
 	img, err := buildImage(template, t, eng, true)
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
@@ -443,43 +437,115 @@ func TestBuildImageWithCache(t *testing.T) {
 	imageId := img.ID
 	imageId := img.ID
 
 
 	img = nil
 	img = nil
-	img, err = buildImage(template, t, eng, true)
+	img, err = buildImage(template, t, eng, expectHit)
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
 
 
-	if imageId != img.ID {
-		t.Logf("Image ids should match: %s != %s", imageId, img.ID)
+	hit := imageId == img.ID
+	if hit != expectHit {
+		t.Logf("Cache misbehavior, got hit=%t, expected hit=%t: (first: %s, second %s)",
+			hit, expectHit, imageId, img.ID)
 		t.Fail()
 		t.Fail()
 	}
 	}
 }
 }
 
 
-func TestBuildImageWithoutCache(t *testing.T) {
-	eng := NewTestEngine(t)
-	defer nuke(mkRuntimeFromEngine(eng, t))
+func TestBuildImageWithCache(t *testing.T) {
+	template := testContextTemplate{`
+        from {IMAGE}
+        maintainer dockerio
+        `,
+		nil, nil}
+	checkCacheBehavior(t, template, true)
+}
 
 
+func TestBuildImageWithoutCache(t *testing.T) {
 	template := testContextTemplate{`
 	template := testContextTemplate{`
         from {IMAGE}
         from {IMAGE}
         maintainer dockerio
         maintainer dockerio
         `,
         `,
 		nil, nil}
 		nil, nil}
+	checkCacheBehavior(t, template, false)
+}
 
 
-	img, err := buildImage(template, t, eng, true)
-	if err != nil {
-		t.Fatal(err)
-	}
-	imageId := img.ID
+func TestBuildADDLocalFileWithCache(t *testing.T) {
+	template := testContextTemplate{`
+        from {IMAGE}
+        maintainer dockerio
+        run echo "first"
+        add foo /usr/lib/bla/bar
+        run echo "second"
+        `,
+		[][2]string{{"foo", "hello"}},
+		nil}
+	checkCacheBehavior(t, template, true)
+}
 
 
-	img = nil
-	img, err = buildImage(template, t, eng, false)
-	if err != nil {
-		t.Fatal(err)
-	}
+func TestBuildADDLocalFileWithoutCache(t *testing.T) {
+	template := testContextTemplate{`
+        from {IMAGE}
+        maintainer dockerio
+        run echo "first"
+        add foo /usr/lib/bla/bar
+        run echo "second"
+        `,
+		[][2]string{{"foo", "hello"}},
+		nil}
+	checkCacheBehavior(t, template, false)
+}
 
 
-	if imageId == img.ID {
-		t.Logf("Image ids should not match: %s == %s", imageId, img.ID)
-		t.Fail()
-	}
+func TestBuildADDRemoteFileWithCache(t *testing.T) {
+	template := testContextTemplate{`
+        from {IMAGE}
+        maintainer dockerio
+        run echo "first"
+        add http://{SERVERADDR}/baz /usr/lib/baz/quux
+        run echo "second"
+        `,
+		nil,
+		[][2]string{{"/baz", "world!"}}}
+	checkCacheBehavior(t, template, true)
+}
+
+func TestBuildADDRemoteFileWithoutCache(t *testing.T) {
+	template := testContextTemplate{`
+        from {IMAGE}
+        maintainer dockerio
+        run echo "first"
+        add http://{SERVERADDR}/baz /usr/lib/baz/quux
+        run echo "second"
+        `,
+		nil,
+		[][2]string{{"/baz", "world!"}}}
+	checkCacheBehavior(t, template, false)
+}
+
+func TestBuildADDLocalAndRemoteFilesWithCache(t *testing.T) {
+	template := testContextTemplate{`
+        from {IMAGE}
+        maintainer dockerio
+        run echo "first"
+        add foo /usr/lib/bla/bar
+        add http://{SERVERADDR}/baz /usr/lib/baz/quux
+        run echo "second"
+        `,
+		[][2]string{{"foo", "hello"}},
+		[][2]string{{"/baz", "world!"}}}
+	checkCacheBehavior(t, template, true)
+}
+
+func TestBuildADDLocalAndRemoteFilesWithoutCache(t *testing.T) {
+	template := testContextTemplate{`
+        from {IMAGE}
+        maintainer dockerio
+        run echo "first"
+        add foo /usr/lib/bla/bar
+        add http://{SERVERADDR}/baz /usr/lib/baz/quux
+        run echo "second"
+        `,
+		[][2]string{{"foo", "hello"}},
+		[][2]string{{"/baz", "world!"}}}
+	checkCacheBehavior(t, template, false)
 }
 }
 
 
 func TestForbiddenContextPath(t *testing.T) {
 func TestForbiddenContextPath(t *testing.T) {