Browse Source

Merge pull request #2857 from shykes/0.7-release

0.7 release
Solomon Hykes 11 years ago
parent
commit
db28e839e0
80 changed files with 7466 additions and 1160 deletions
  1. 12 0
      CHANGELOG.md
  2. 14 5
      Dockerfile
  3. 1 1
      VERSION
  4. 1 1
      api.go
  5. 11 9
      api_params.go
  6. 112 16
      archive/archive.go
  7. 4 4
      archive/archive_test.go
  8. 317 0
      archive/changes.go
  9. 305 0
      archive/changes_test.go
  10. 95 0
      archive/diff.go
  11. 2 2
      buildfile.go
  12. 0 106
      changes.go
  13. 163 146
      commands.go
  14. 2 0
      config.go
  15. 68 88
      container.go
  16. 170 0
      contrib/docker-device-tool/device_tool.go
  17. 12 10
      docker/docker.go
  18. 1 1
      docs/Dockerfile
  19. 7 6
      docs/README.md
  20. 46 1
      docs/sources/commandline/cli.rst
  21. 137 0
      docs/sources/examples/cfengine_process_management.rst
  22. 2 0
      docs/sources/examples/index.rst
  23. 128 0
      docs/sources/examples/using_supervisord.rst
  24. 5 5
      docs/sources/installation/archlinux.rst
  25. 4 12
      docs/sources/installation/binaries.rst
  26. 19 0
      docs/sources/installation/fedora.rst
  27. 14 12
      docs/sources/installation/gentoolinux.rst
  28. 8 7
      docs/sources/installation/index.rst
  29. 2 13
      docs/sources/installation/kernel.rst
  30. 8 9
      docs/sources/installation/rackspace.rst
  31. 43 41
      docs/sources/installation/ubuntulinux.rst
  32. 2 2
      docs/sources/use/basics.rst
  33. 1 1
      docs/theme/docker/layout.html
  34. 82 45
      graph.go
  35. 0 292
      graph_test.go
  36. 0 0
      graphdb/MAINTAINERS
  37. 1 1
      graphdb/graphdb.go
  38. 1 1
      graphdb/graphdb_test.go
  39. 1 1
      graphdb/sort.go
  40. 1 1
      graphdb/sort_test.go
  41. 1 1
      graphdb/utils.go
  42. 336 0
      graphdriver/aufs/aufs.go
  43. 623 0
      graphdriver/aufs/aufs_test.go
  44. 46 0
      graphdriver/aufs/dirs.go
  45. 194 0
      graphdriver/aufs/migrate.go
  46. 3 19
      graphdriver/aufs/mount.go
  47. 1 1
      graphdriver/aufs/mount_darwin.go
  48. 1 1
      graphdriver/aufs/mount_linux.go
  49. 956 0
      graphdriver/devmapper/deviceset.go
  50. 576 0
      graphdriver/devmapper/devmapper.go
  51. 106 0
      graphdriver/devmapper/devmapper_doc.go
  52. 13 0
      graphdriver/devmapper/devmapper_log.go
  53. 285 0
      graphdriver/devmapper/devmapper_test.go
  54. 340 0
      graphdriver/devmapper/devmapper_wrapper.go
  55. 126 0
      graphdriver/devmapper/driver.go
  56. 872 0
      graphdriver/devmapper/driver_test.go
  57. 25 0
      graphdriver/devmapper/mount.go
  58. 50 0
      graphdriver/devmapper/sys.go
  59. 90 0
      graphdriver/driver.go
  60. 91 0
      graphdriver/vfs/driver.go
  61. 14 3
      hack/make.sh
  62. 23 0
      hack/make/tgz
  63. 18 0
      hack/release.sh
  64. 89 163
      image.go
  65. 4 5
      integration/commands_test.go
  66. 1 1
      integration/container_test.go
  67. 271 12
      integration/graph_test.go
  68. 3 0
      integration/runtime_test.go
  69. 40 3
      namesgenerator/names-generator.go
  70. 21 0
      namesgenerator/names-generator_test.go
  71. 3 0
      network.go
  72. 183 27
      runtime.go
  73. 59 34
      server.go
  74. 16 26
      server_unit_test.go
  75. 3 3
      state.go
  76. 9 3
      tags_unit_test.go
  77. 66 6
      utils.go
  78. 35 0
      utils/fs.go
  79. 47 13
      utils/utils.go
  80. 24 0
      utils_test.go

+ 12 - 0
CHANGELOG.md

@@ -1,5 +1,17 @@
 # Changelog
 
+## 0.7.0 (2013-11-25)
+
+#### Notable features since 0.6.0
+
+* Storage drivers: choose from aufs, device mapper, vfs or btrfs.
+* Standard Linux support: docker now runs on unmodified linux kernels and all major distributions.
+* Links: compose complex software stacks by connecting containers to each other.
+* Container naming: organize your containers by giving them memorable names.
+* Advanced port redirects: specify port redirects per interface, or keep sensitive ports private.
+* Offline transfer: push and pull images to the filesystem without losing information.
+* Quality: numerous bugfixes and small usability improvements. Significant increase in test coverage.
+
 ## 0.6.7 (2013-11-21)
 
 #### Runtime

+ 14 - 5
Dockerfile

@@ -23,7 +23,7 @@
 # the case. Therefore, you don't have to disable it anymore.
 #
 
-docker-version 0.6.1
+docker-version	0.6.1
 from	ubuntu:12.04
 maintainer	Solomon Hykes <solomon@dotcloud.com>
 
@@ -33,13 +33,13 @@ run	apt-get update
 run	apt-get install -y -q curl
 run	apt-get install -y -q git
 run	apt-get install -y -q mercurial
-run apt-get install -y -q build-essential libsqlite3-dev
+run	apt-get install -y -q build-essential libsqlite3-dev
 
 # Install Go
 run	curl -s https://go.googlecode.com/files/go1.2rc5.src.tar.gz | tar -v -C /usr/local -xz
 env	PATH	/usr/local/go/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin
 env	GOPATH	/go:/go/src/github.com/dotcloud/docker/vendor
-run cd /usr/local/go/src && ./make.bash && go install -ldflags '-w -linkmode external -extldflags "-static -Wl,--unresolved-symbols=ignore-in-shared-libs"' -tags netgo -a std
+run	cd /usr/local/go/src && ./make.bash && go install -ldflags '-w -linkmode external -extldflags "-static -Wl,--unresolved-symbols=ignore-in-shared-libs"' -tags netgo -a std
 
 # Ubuntu stuff
 run	apt-get install -y -q ruby1.9.3 rubygems libffi-dev
@@ -56,11 +56,20 @@ run	apt-get install -y -q iptables
 run	apt-get install -y -q lxc
 run	apt-get install -y -q aufs-tools
 
+# Get lvm2 source for compiling statically
+run	git clone https://git.fedorahosted.org/git/lvm2.git /usr/local/lvm2 && cd /usr/local/lvm2 && git checkout v2_02_103
+# see https://git.fedorahosted.org/cgit/lvm2.git/refs/tags for release tags
+# note: we can't use "git clone -b" above because it requires at least git 1.7.10 to be able to use that on a tag instead of a branch and we only have 1.7.9.5
+
+# Compile and install lvm2
+run	cd /usr/local/lvm2 && ./configure --enable-static_link && make device-mapper && make install_device-mapper
+# see https://git.fedorahosted.org/cgit/lvm2.git/tree/INSTALL
+
 volume	/var/lib/docker
 workdir	/go/src/github.com/dotcloud/docker
 
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
-entrypoint ["hack/dind"]
+entrypoint	["hack/dind"]
 
 # Upload docker source
-add	.       /go/src/github.com/dotcloud/docker
+add	.	/go/src/github.com/dotcloud/docker

+ 1 - 1
VERSION

@@ -1 +1 @@
-0.6.7
+0.7.0

+ 1 - 1
api.go

@@ -254,7 +254,7 @@ func getEvents(srv *Server, version float64, w http.ResponseWriter, r *http.Requ
 	wf.Flush()
 	if since != 0 {
 		// If since, send previous events that happened after the timestamp
-		for _, event := range srv.events {
+		for _, event := range srv.GetEvents() {
 			if event.Time >= since {
 				err := sendEvent(wf, &event)
 				if err != nil && err.Error() == "JSON error" {

+ 11 - 9
api_params.go

@@ -33,15 +33,17 @@ type (
 		Debug              bool
 		Containers         int
 		Images             int
-		NFd                int    `json:",omitempty"`
-		NGoroutines        int    `json:",omitempty"`
-		MemoryLimit        bool   `json:",omitempty"`
-		SwapLimit          bool   `json:",omitempty"`
-		IPv4Forwarding     bool   `json:",omitempty"`
-		LXCVersion         string `json:",omitempty"`
-		NEventsListener    int    `json:",omitempty"`
-		KernelVersion      string `json:",omitempty"`
-		IndexServerAddress string `json:",omitempty"`
+		Driver             string      `json:",omitempty"`
+		DriverStatus       [][2]string `json:",omitempty"`
+		NFd                int         `json:",omitempty"`
+		NGoroutines        int         `json:",omitempty"`
+		MemoryLimit        bool        `json:",omitempty"`
+		SwapLimit          bool        `json:",omitempty"`
+		IPv4Forwarding     bool        `json:",omitempty"`
+		LXCVersion         string      `json:",omitempty"`
+		NEventsListener    int         `json:",omitempty"`
+		KernelVersion      string      `json:",omitempty"`
+		IndexServerAddress string      `json:",omitempty"`
 	}
 
 	APITop struct {

+ 112 - 16
archive/archive.go

@@ -15,7 +15,15 @@ import (
 
 type Archive io.Reader
 
-type Compression uint32
+type Compression int
+
+type TarOptions struct {
+	Includes    []string
+	Excludes    []string
+	Recursive   bool
+	Compression Compression
+	CreateFiles []string
+}
 
 const (
 	Uncompressed Compression = iota
@@ -80,20 +88,78 @@ func (compression *Compression) Extension() string {
 // Tar creates an archive from the directory at `path`, and returns it as a
 // stream of bytes.
 func Tar(path string, compression Compression) (io.Reader, error) {
-	return TarFilter(path, compression, nil)
+	return TarFilter(path, &TarOptions{Recursive: true, Compression: compression})
+}
+
+func escapeName(name string) string {
+	escaped := make([]byte, 0)
+	for i, c := range []byte(name) {
+		if i == 0 && c == '/' {
+			continue
+		}
+		// all printable chars except "-" which is 0x2d
+		if (0x20 <= c && c <= 0x7E) && c != 0x2d {
+			escaped = append(escaped, c)
+		} else {
+			escaped = append(escaped, fmt.Sprintf("\\%03o", c)...)
+		}
+	}
+	return string(escaped)
 }
 
 // Tar creates an archive from the directory at `path`, only including files whose relative
 // paths are included in `filter`. If `filter` is nil, then all files are included.
-func TarFilter(path string, compression Compression, filter []string) (io.Reader, error) {
-	args := []string{"tar", "--numeric-owner", "-f", "-", "-C", path}
-	if filter == nil {
-		filter = []string{"."}
+func TarFilter(path string, options *TarOptions) (io.Reader, error) {
+	args := []string{"tar", "--numeric-owner", "-f", "-", "-C", path, "-T", "-"}
+	if options.Includes == nil {
+		options.Includes = []string{"."}
 	}
-	for _, f := range filter {
-		args = append(args, "-c"+compression.Flag(), f)
+	args = append(args, "-c"+options.Compression.Flag())
+
+	for _, exclude := range options.Excludes {
+		args = append(args, fmt.Sprintf("--exclude=%s", exclude))
+	}
+
+	if !options.Recursive {
+		args = append(args, "--no-recursion")
+	}
+
+	files := ""
+	for _, f := range options.Includes {
+		files = files + escapeName(f) + "\n"
+	}
+
+	tmpDir := ""
+
+	if options.CreateFiles != nil {
+		var err error // Can't use := here or we override the outer tmpDir
+		tmpDir, err = ioutil.TempDir("", "docker-tar")
+		if err != nil {
+			return nil, err
+		}
+
+		files = files + "-C" + tmpDir + "\n"
+		for _, f := range options.CreateFiles {
+			path := filepath.Join(tmpDir, f)
+			err := os.MkdirAll(filepath.Dir(path), 0600)
+			if err != nil {
+				return nil, err
+			}
+
+			if file, err := os.OpenFile(path, os.O_CREATE, 0600); err != nil {
+				return nil, err
+			} else {
+				file.Close()
+			}
+			files = files + escapeName(f) + "\n"
+		}
 	}
-	return CmdStream(exec.Command(args[0], args[1:]...))
+
+	return CmdStream(exec.Command(args[0], args[1:]...), &files, func() {
+		if tmpDir != "" {
+			_ = os.RemoveAll(tmpDir)
+		}
+	})
 }
 
 // Untar reads a stream of bytes from `archive`, parses it as a tar archive,
@@ -101,7 +167,7 @@ func TarFilter(path string, compression Compression, filter []string) (io.Reader
 // The archive may be compressed with one of the following algorithms:
 //  identity (uncompressed), gzip, bzip2, xz.
 // FIXME: specify behavior when target path exists vs. doesn't exist.
-func Untar(archive io.Reader, path string) error {
+func Untar(archive io.Reader, path string, options *TarOptions) error {
 	if archive == nil {
 		return fmt.Errorf("Empty archive")
 	}
@@ -123,8 +189,15 @@ func Untar(archive io.Reader, path string) error {
 	compression := DetectCompression(buf)
 
 	utils.Debugf("Archive compression detected: %s", compression.Extension())
+	args := []string{"--numeric-owner", "-f", "-", "-C", path, "-x" + compression.Flag()}
+
+	if options != nil {
+		for _, exclude := range options.Excludes {
+			args = append(args, fmt.Sprintf("--exclude=%s", exclude))
+		}
+	}
 
-	cmd := exec.Command("tar", "--numeric-owner", "-f", "-", "-C", path, "-x"+compression.Flag())
+	cmd := exec.Command("tar", args...)
 	cmd.Stdin = io.MultiReader(bytes.NewReader(buf), archive)
 	// Hardcode locale environment for predictable outcome regardless of host configuration.
 	//   (see https://github.com/dotcloud/docker/issues/355)
@@ -141,11 +214,11 @@ func Untar(archive io.Reader, path string) error {
 // TarUntar aborts and returns the error.
 func TarUntar(src string, filter []string, dst string) error {
 	utils.Debugf("TarUntar(%s %s %s)", src, filter, dst)
-	archive, err := TarFilter(src, Uncompressed, filter)
+	archive, err := TarFilter(src, &TarOptions{Compression: Uncompressed, Includes: filter, Recursive: true})
 	if err != nil {
 		return err
 	}
-	return Untar(archive, dst)
+	return Untar(archive, dst, nil)
 }
 
 // UntarPath is a convenience function which looks for an archive
@@ -153,7 +226,7 @@ func TarUntar(src string, filter []string, dst string) error {
 func UntarPath(src, dst string) error {
 	if archive, err := os.Open(src); err != nil {
 		return err
-	} else if err := Untar(archive, dst); err != nil {
+	} else if err := Untar(archive, dst, nil); err != nil {
 		return err
 	}
 	return nil
@@ -222,19 +295,39 @@ func CopyFileWithTar(src, dst string) error {
 		return err
 	}
 	tw.Close()
-	return Untar(buf, filepath.Dir(dst))
+	return Untar(buf, filepath.Dir(dst), nil)
 }
 
 // CmdStream executes a command, and returns its stdout as a stream.
 // If the command fails to run or doesn't complete successfully, an error
 // will be returned, including anything written on stderr.
-func CmdStream(cmd *exec.Cmd) (io.Reader, error) {
+func CmdStream(cmd *exec.Cmd, input *string, atEnd func()) (io.Reader, error) {
+	if input != nil {
+		stdin, err := cmd.StdinPipe()
+		if err != nil {
+			if atEnd != nil {
+				atEnd()
+			}
+			return nil, err
+		}
+		// Write stdin if any
+		go func() {
+			_, _ = stdin.Write([]byte(*input))
+			stdin.Close()
+		}()
+	}
 	stdout, err := cmd.StdoutPipe()
 	if err != nil {
+		if atEnd != nil {
+			atEnd()
+		}
 		return nil, err
 	}
 	stderr, err := cmd.StderrPipe()
 	if err != nil {
+		if atEnd != nil {
+			atEnd()
+		}
 		return nil, err
 	}
 	pipeR, pipeW := io.Pipe()
@@ -259,6 +352,9 @@ func CmdStream(cmd *exec.Cmd) (io.Reader, error) {
 		} else {
 			pipeW.Close()
 		}
+		if atEnd != nil {
+			atEnd()
+		}
 	}()
 	// Run the command and return the pipe
 	if err := cmd.Start(); err != nil {

+ 4 - 4
archive/archive_test.go

@@ -14,7 +14,7 @@ import (
 
 func TestCmdStreamLargeStderr(t *testing.T) {
 	cmd := exec.Command("/bin/sh", "-c", "dd if=/dev/zero bs=1k count=1000 of=/dev/stderr; echo hello")
-	out, err := CmdStream(cmd)
+	out, err := CmdStream(cmd, nil, nil)
 	if err != nil {
 		t.Fatalf("Failed to start command: %s", err)
 	}
@@ -35,7 +35,7 @@ func TestCmdStreamLargeStderr(t *testing.T) {
 
 func TestCmdStreamBad(t *testing.T) {
 	badCmd := exec.Command("/bin/sh", "-c", "echo hello; echo >&2 error couldn\\'t reverse the phase pulser; exit 1")
-	out, err := CmdStream(badCmd)
+	out, err := CmdStream(badCmd, nil, nil)
 	if err != nil {
 		t.Fatalf("Failed to start command: %s", err)
 	}
@@ -50,7 +50,7 @@ func TestCmdStreamBad(t *testing.T) {
 
 func TestCmdStreamGood(t *testing.T) {
 	cmd := exec.Command("/bin/sh", "-c", "echo hello; exit 0")
-	out, err := CmdStream(cmd)
+	out, err := CmdStream(cmd, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -83,7 +83,7 @@ func tarUntar(t *testing.T, origin string, compression Compression) error {
 		return err
 	}
 	defer os.RemoveAll(tmp)
-	if err := Untar(archive, tmp); err != nil {
+	if err := Untar(archive, tmp, nil); err != nil {
 		return err
 	}
 	if _, err := os.Stat(tmp); err != nil {

+ 317 - 0
archive/changes.go

@@ -0,0 +1,317 @@
+package archive
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"syscall"
+)
+
+type ChangeType int
+
+const (
+	ChangeModify = iota
+	ChangeAdd
+	ChangeDelete
+)
+
+type Change struct {
+	Path string
+	Kind ChangeType
+}
+
+func (change *Change) String() string {
+	var kind string
+	switch change.Kind {
+	case ChangeModify:
+		kind = "C"
+	case ChangeAdd:
+		kind = "A"
+	case ChangeDelete:
+		kind = "D"
+	}
+	return fmt.Sprintf("%s %s", kind, change.Path)
+}
+
+func Changes(layers []string, rw string) ([]Change, error) {
+	var changes []Change
+	err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Rebase path
+		path, err = filepath.Rel(rw, path)
+		if err != nil {
+			return err
+		}
+		path = filepath.Join("/", path)
+
+		// Skip root
+		if path == "/" {
+			return nil
+		}
+
+		// Skip AUFS metadata
+		if matched, err := filepath.Match("/.wh..wh.*", path); err != nil || matched {
+			return err
+		}
+
+		change := Change{
+			Path: path,
+		}
+
+		// Find out what kind of modification happened
+		file := filepath.Base(path)
+		// If there is a whiteout, then the file was removed
+		if strings.HasPrefix(file, ".wh.") {
+			originalFile := file[len(".wh."):]
+			change.Path = filepath.Join(filepath.Dir(path), originalFile)
+			change.Kind = ChangeDelete
+		} else {
+			// Otherwise, the file was added
+			change.Kind = ChangeAdd
+
+			// ...Unless it already existed in a top layer, in which case, it's a modification
+			for _, layer := range layers {
+				stat, err := os.Stat(filepath.Join(layer, path))
+				if err != nil && !os.IsNotExist(err) {
+					return err
+				}
+				if err == nil {
+					// The file existed in the top layer, so that's a modification
+
+					// However, if it's a directory, maybe it wasn't actually modified.
+					// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
+					if stat.IsDir() && f.IsDir() {
+						if f.Size() == stat.Size() && f.Mode() == stat.Mode() && f.ModTime() == stat.ModTime() {
+							// Both directories are the same, don't record the change
+							return nil
+						}
+					}
+					change.Kind = ChangeModify
+					break
+				}
+			}
+		}
+
+		// Record change
+		changes = append(changes, change)
+		return nil
+	})
+	if err != nil && !os.IsNotExist(err) {
+		return nil, err
+	}
+	return changes, nil
+}
+
+type FileInfo struct {
+	parent   *FileInfo
+	name     string
+	stat     syscall.Stat_t
+	children map[string]*FileInfo
+}
+
+func (root *FileInfo) LookUp(path string) *FileInfo {
+	parent := root
+	if path == "/" {
+		return root
+	}
+
+	pathElements := strings.Split(path, "/")
+	for _, elem := range pathElements {
+		if elem != "" {
+			child := parent.children[elem]
+			if child == nil {
+				return nil
+			}
+			parent = child
+		}
+	}
+	return parent
+}
+
+func (info *FileInfo) path() string {
+	if info.parent == nil {
+		return "/"
+	}
+	return filepath.Join(info.parent.path(), info.name)
+}
+
+func (info *FileInfo) isDir() bool {
+	return info.parent == nil || info.stat.Mode&syscall.S_IFDIR == syscall.S_IFDIR
+}
+
+func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
+	if oldInfo == nil {
+		// add
+		change := Change{
+			Path: info.path(),
+			Kind: ChangeAdd,
+		}
+		*changes = append(*changes, change)
+	}
+
+	// We make a copy so we can modify it to detect additions
+	// also, we only recurse on the old dir if the new info is a directory
+	// otherwise any previous delete/change is considered recursive
+	oldChildren := make(map[string]*FileInfo)
+	if oldInfo != nil && info.isDir() {
+		for k, v := range oldInfo.children {
+			oldChildren[k] = v
+		}
+	}
+
+	for name, newChild := range info.children {
+		oldChild, _ := oldChildren[name]
+		if oldChild != nil {
+			// change?
+			oldStat := &oldChild.stat
+			newStat := &newChild.stat
+			// Note: We can't compare inode or ctime or blocksize here, because these change
+			// when copying a file into a container. However, that is not generally a problem
+			// because any content change will change mtime, and any status change should
+			// be visible when actually comparing the stat fields. The only time this
+			// breaks down is if some code intentionally hides a change by setting
+			// back mtime
+			if oldStat.Mode != newStat.Mode ||
+				oldStat.Uid != newStat.Uid ||
+				oldStat.Gid != newStat.Gid ||
+				oldStat.Rdev != newStat.Rdev ||
+				// Don't look at size for dirs, its not a good measure of change
+				(oldStat.Size != newStat.Size && oldStat.Mode&syscall.S_IFDIR != syscall.S_IFDIR) ||
+				oldStat.Mtim != newStat.Mtim {
+				change := Change{
+					Path: newChild.path(),
+					Kind: ChangeModify,
+				}
+				*changes = append(*changes, change)
+			}
+
+			// Remove from copy so we can detect deletions
+			delete(oldChildren, name)
+		}
+
+		newChild.addChanges(oldChild, changes)
+	}
+	for _, oldChild := range oldChildren {
+		// delete
+		change := Change{
+			Path: oldChild.path(),
+			Kind: ChangeDelete,
+		}
+		*changes = append(*changes, change)
+	}
+
+}
+
+func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
+	var changes []Change
+
+	info.addChanges(oldInfo, &changes)
+
+	return changes
+}
+
+func newRootFileInfo() *FileInfo {
+	root := &FileInfo{
+		name:     "/",
+		children: make(map[string]*FileInfo),
+	}
+	return root
+}
+
+func collectFileInfo(sourceDir string) (*FileInfo, error) {
+	root := newRootFileInfo()
+
+	err := filepath.Walk(sourceDir, func(path string, f os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Rebase path
+		relPath, err := filepath.Rel(sourceDir, path)
+		if err != nil {
+			return err
+		}
+		relPath = filepath.Join("/", relPath)
+
+		if relPath == "/" {
+			return nil
+		}
+
+		parent := root.LookUp(filepath.Dir(relPath))
+		if parent == nil {
+			return fmt.Errorf("collectFileInfo: Unexpectedly no parent for %s", relPath)
+		}
+
+		info := &FileInfo{
+			name:     filepath.Base(relPath),
+			children: make(map[string]*FileInfo),
+			parent:   parent,
+		}
+
+		if err := syscall.Lstat(path, &info.stat); err != nil {
+			return err
+		}
+
+		parent.children[info.name] = info
+
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	return root, nil
+}
+
+// Compare two directories and generate an array of Change objects describing the changes
+func ChangesDirs(newDir, oldDir string) ([]Change, error) {
+	oldRoot, err := collectFileInfo(oldDir)
+	if err != nil {
+		return nil, err
+	}
+	newRoot, err := collectFileInfo(newDir)
+	if err != nil {
+		return nil, err
+	}
+
+	return newRoot.Changes(oldRoot), nil
+}
+
+func ChangesSize(newDir string, changes []Change) int64 {
+	var size int64
+	for _, change := range changes {
+		if change.Kind == ChangeModify || change.Kind == ChangeAdd {
+			file := filepath.Join(newDir, change.Path)
+			fileInfo, _ := os.Lstat(file)
+			if fileInfo != nil && !fileInfo.IsDir() {
+				size += fileInfo.Size()
+			}
+		}
+	}
+	return size
+}
+
+func ExportChanges(dir string, changes []Change) (Archive, error) {
+	files := make([]string, 0)
+	deletions := make([]string, 0)
+	for _, change := range changes {
+		if change.Kind == ChangeModify || change.Kind == ChangeAdd {
+			files = append(files, change.Path)
+		}
+		if change.Kind == ChangeDelete {
+			base := filepath.Base(change.Path)
+			dir := filepath.Dir(change.Path)
+			deletions = append(deletions, filepath.Join(dir, ".wh."+base))
+		}
+	}
+	// FIXME: Why do we create whiteout files inside Tar code ?
+	return TarFilter(dir, &TarOptions{
+		Compression: Uncompressed,
+		Includes:    files,
+		Recursive:   false,
+		CreateFiles: deletions,
+	})
+}

+ 305 - 0
archive/changes_test.go

@@ -0,0 +1,305 @@
+package archive
+
+import (
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path"
+	"sort"
+	"testing"
+	"time"
+)
+
+func max(x, y int) int {
+	if x >= y {
+		return x
+	}
+	return y
+}
+
+func copyDir(src, dst string) error {
+	cmd := exec.Command("cp", "-a", src, dst)
+	if err := cmd.Run(); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Helper to sort []Change by path
+type byPath struct{ changes []Change }
+
+func (b byPath) Less(i, j int) bool { return b.changes[i].Path < b.changes[j].Path }
+func (b byPath) Len() int           { return len(b.changes) }
+func (b byPath) Swap(i, j int)      { b.changes[i], b.changes[j] = b.changes[j], b.changes[i] }
+
+type FileType uint32
+
+const (
+	Regular FileType = iota
+	Dir
+	Symlink
+)
+
+type FileData struct {
+	filetype    FileType
+	path        string
+	contents    string
+	permissions os.FileMode
+}
+
+func createSampleDir(t *testing.T, root string) {
+	files := []FileData{
+		{Regular, "file1", "file1\n", 0600},
+		{Regular, "file2", "file2\n", 0666},
+		{Regular, "file3", "file3\n", 0404},
+		{Regular, "file4", "file4\n", 0600},
+		{Regular, "file5", "file5\n", 0600},
+		{Regular, "file6", "file6\n", 0600},
+		{Regular, "file7", "file7\n", 0600},
+		{Dir, "dir1", "", 0740},
+		{Regular, "dir1/file1-1", "file1-1\n", 01444},
+		{Regular, "dir1/file1-2", "file1-2\n", 0666},
+		{Dir, "dir2", "", 0700},
+		{Regular, "dir2/file2-1", "file2-1\n", 0666},
+		{Regular, "dir2/file2-2", "file2-2\n", 0666},
+		{Dir, "dir3", "", 0700},
+		{Regular, "dir3/file3-1", "file3-1\n", 0666},
+		{Regular, "dir3/file3-2", "file3-2\n", 0666},
+		{Dir, "dir4", "", 0700},
+		{Regular, "dir4/file3-1", "file4-1\n", 0666},
+		{Regular, "dir4/file3-2", "file4-2\n", 0666},
+		{Symlink, "symlink1", "target1", 0666},
+		{Symlink, "symlink2", "target2", 0666},
+	}
+
+	now := time.Now()
+	for _, info := range files {
+		p := path.Join(root, info.path)
+		if info.filetype == Dir {
+			if err := os.MkdirAll(p, info.permissions); err != nil {
+				t.Fatal(err)
+			}
+		} else if info.filetype == Regular {
+			if err := ioutil.WriteFile(p, []byte(info.contents), info.permissions); err != nil {
+				t.Fatal(err)
+			}
+		} else if info.filetype == Symlink {
+			if err := os.Symlink(info.contents, p); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		if info.filetype != Symlink {
+			// Set a consistent ctime, atime for all files and dirs
+			if err := os.Chtimes(p, now, now); err != nil {
+				t.Fatal(err)
+			}
+		}
+	}
+}
+
+// Create an directory, copy it, make sure we report no changes between the two
+func TestChangesDirsEmpty(t *testing.T) {
+	src, err := ioutil.TempDir("", "docker-changes-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	createSampleDir(t, src)
+	dst := src + "-copy"
+	if err := copyDir(src, dst); err != nil {
+		t.Fatal(err)
+	}
+	changes, err := ChangesDirs(dst, src)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(changes) != 0 {
+		t.Fatalf("Reported changes for identical dirs: %v", changes)
+	}
+	os.RemoveAll(src)
+	os.RemoveAll(dst)
+}
+
+func mutateSampleDir(t *testing.T, root string) {
+	// Remove a regular file
+	if err := os.RemoveAll(path.Join(root, "file1")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Remove a directory
+	if err := os.RemoveAll(path.Join(root, "dir1")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Remove a symlink
+	if err := os.RemoveAll(path.Join(root, "symlink1")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Rewrite a file
+	if err := ioutil.WriteFile(path.Join(root, "file2"), []byte("fileN\n"), 0777); err != nil {
+		t.Fatal(err)
+	}
+
+	// Replace a file
+	if err := os.RemoveAll(path.Join(root, "file3")); err != nil {
+		t.Fatal(err)
+	}
+	if err := ioutil.WriteFile(path.Join(root, "file3"), []byte("fileM\n"), 0404); err != nil {
+		t.Fatal(err)
+	}
+
+	// Touch file
+	if err := os.Chtimes(path.Join(root, "file4"), time.Now(), time.Now()); err != nil {
+		t.Fatal(err)
+	}
+
+	// Replace file with dir
+	if err := os.RemoveAll(path.Join(root, "file5")); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(path.Join(root, "file5"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create new file
+	if err := ioutil.WriteFile(path.Join(root, "filenew"), []byte("filenew\n"), 0777); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create new dir
+	if err := os.MkdirAll(path.Join(root, "dirnew"), 0766); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a new symlink
+	if err := os.Symlink("targetnew", path.Join(root, "symlinknew")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Change a symlink
+	if err := os.RemoveAll(path.Join(root, "symlink2")); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Symlink("target2change", path.Join(root, "symlink2")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Replace dir with file
+	if err := os.RemoveAll(path.Join(root, "dir2")); err != nil {
+		t.Fatal(err)
+	}
+	if err := ioutil.WriteFile(path.Join(root, "dir2"), []byte("dir2\n"), 0777); err != nil {
+		t.Fatal(err)
+	}
+
+	// Touch dir
+	if err := os.Chtimes(path.Join(root, "dir3"), time.Now(), time.Now()); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestChangesDirsMutated(t *testing.T) {
+	src, err := ioutil.TempDir("", "docker-changes-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	createSampleDir(t, src)
+	dst := src + "-copy"
+	if err := copyDir(src, dst); err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(src)
+	defer os.RemoveAll(dst)
+
+	mutateSampleDir(t, dst)
+
+	changes, err := ChangesDirs(dst, src)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	sort.Sort(byPath{changes})
+
+	expectedChanges := []Change{
+		{"/dir1", ChangeDelete},
+		{"/dir2", ChangeModify},
+		{"/dir3", ChangeModify},
+		{"/dirnew", ChangeAdd},
+		{"/file1", ChangeDelete},
+		{"/file2", ChangeModify},
+		{"/file3", ChangeModify},
+		{"/file4", ChangeModify},
+		{"/file5", ChangeModify},
+		{"/filenew", ChangeAdd},
+		{"/symlink1", ChangeDelete},
+		{"/symlink2", ChangeModify},
+		{"/symlinknew", ChangeAdd},
+	}
+
+	for i := 0; i < max(len(changes), len(expectedChanges)); i++ {
+		if i >= len(expectedChanges) {
+			t.Fatalf("unexpected change %s\n", changes[i].String())
+		}
+		if i >= len(changes) {
+			t.Fatalf("no change for expected change %s\n", expectedChanges[i].String())
+		}
+		if changes[i].Path == expectedChanges[i].Path {
+			if changes[i] != expectedChanges[i] {
+				t.Fatalf("Wrong change for %s, expected %s, got %d\n", changes[i].Path, changes[i].String(), expectedChanges[i].String())
+			}
+		} else if changes[i].Path < expectedChanges[i].Path {
+			t.Fatalf("unexpected change %s\n", changes[i].String())
+		} else {
+			t.Fatalf("no change for expected change %s != %s\n", expectedChanges[i].String(), changes[i].String())
+		}
+	}
+}
+
+func TestApplyLayer(t *testing.T) {
+	t.Skip("Skipping TestApplyLayer due to known failures") // Disable this for now as it is broken
+	return
+
+	src, err := ioutil.TempDir("", "docker-changes-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	createSampleDir(t, src)
+	dst := src + "-copy"
+	if err := copyDir(src, dst); err != nil {
+		t.Fatal(err)
+	}
+	mutateSampleDir(t, dst)
+
+	changes, err := ChangesDirs(dst, src)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	layer, err := ExportChanges(dst, changes)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	layerCopy, err := NewTempArchive(layer, "")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := ApplyLayer(src, layerCopy); err != nil {
+		t.Fatal(err)
+	}
+
+	changes2, err := ChangesDirs(src, dst)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(changes2) != 0 {
+		t.Fatalf("Unexpected differences after re applying mutation: %v", changes)
+	}
+
+	os.RemoveAll(src)
+	os.RemoveAll(dst)
+}

+ 95 - 0
archive/diff.go

@@ -0,0 +1,95 @@
+package archive
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"syscall"
+	"time"
+)
+
+// ApplyLayer parses a diff in the standard layer format from `layer`, and
+// applies it to the directory `dest`.
+func ApplyLayer(dest string, layer Archive) error {
+	// Poor man's diff applyer in 2 steps:
+
+	// Step 1: untar everything in place
+	if err := Untar(layer, dest, nil); err != nil {
+		return err
+	}
+
+	modifiedDirs := make(map[string]*syscall.Stat_t)
+	addDir := func(file string) {
+		d := filepath.Dir(file)
+		if _, exists := modifiedDirs[d]; !exists {
+			if s, err := os.Lstat(d); err == nil {
+				if sys := s.Sys(); sys != nil {
+					if stat, ok := sys.(*syscall.Stat_t); ok {
+						modifiedDirs[d] = stat
+					}
+				}
+			}
+		}
+	}
+
+	// Step 2: walk for whiteouts and apply them, removing them in the process
+	err := filepath.Walk(dest, func(fullPath string, f os.FileInfo, err error) error {
+		if err != nil {
+			if os.IsNotExist(err) {
+				// This happens in the case of whiteouts in parent dir removing a directory
+				// We just ignore it
+				return filepath.SkipDir
+			}
+			return err
+		}
+
+		// Rebase path
+		path, err := filepath.Rel(dest, fullPath)
+		if err != nil {
+			return err
+		}
+		path = filepath.Join("/", path)
+
+		// Skip AUFS metadata
+		if matched, err := filepath.Match("/.wh..wh.*", path); err != nil {
+			return err
+		} else if matched {
+			addDir(fullPath)
+			if err := os.RemoveAll(fullPath); err != nil {
+				return err
+			}
+		}
+
+		filename := filepath.Base(path)
+		if strings.HasPrefix(filename, ".wh.") {
+			rmTargetName := filename[len(".wh."):]
+			rmTargetPath := filepath.Join(filepath.Dir(fullPath), rmTargetName)
+
+			// Remove the file targeted by the whiteout
+			addDir(rmTargetPath)
+			if err := os.RemoveAll(rmTargetPath); err != nil {
+				return err
+			}
+			// Remove the whiteout itself
+			addDir(fullPath)
+			if err := os.RemoveAll(fullPath); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	for k, v := range modifiedDirs {
+		aTime := time.Unix(v.Atim.Unix())
+		mTime := time.Unix(v.Mtim.Unix())
+
+		if err := os.Chtimes(k, aTime, mTime); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}

+ 2 - 2
buildfile.go

@@ -241,7 +241,7 @@ func (b *buildFile) CmdVolume(args string) error {
 		volume = []string{args}
 	}
 	if b.config.Volumes == nil {
-		b.config.Volumes = NewPathOpts()
+		b.config.Volumes = PathOpts{}
 	}
 	for _, v := range volume {
 		b.config.Volumes[v] = struct{}{}
@@ -476,7 +476,7 @@ func (b *buildFile) Build(context io.Reader) (string, error) {
 	if err != nil {
 		return "", err
 	}
-	if err := archive.Untar(context, name); err != nil {
+	if err := archive.Untar(context, name, nil); err != nil {
 		return "", err
 	}
 	defer os.RemoveAll(name)

+ 0 - 106
changes.go

@@ -1,106 +0,0 @@
-package docker
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-)
-
-type ChangeType int
-
-const (
-	ChangeModify = iota
-	ChangeAdd
-	ChangeDelete
-)
-
-type Change struct {
-	Path string
-	Kind ChangeType
-}
-
-func (change *Change) String() string {
-	var kind string
-	switch change.Kind {
-	case ChangeModify:
-		kind = "C"
-	case ChangeAdd:
-		kind = "A"
-	case ChangeDelete:
-		kind = "D"
-	}
-	return fmt.Sprintf("%s %s", kind, change.Path)
-}
-
-func Changes(layers []string, rw string) ([]Change, error) {
-	var changes []Change
-	err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
-		if err != nil {
-			return err
-		}
-
-		// Rebase path
-		path, err = filepath.Rel(rw, path)
-		if err != nil {
-			return err
-		}
-		path = filepath.Join("/", path)
-
-		// Skip root
-		if path == "/" {
-			return nil
-		}
-
-		// Skip AUFS metadata
-		if matched, err := filepath.Match("/.wh..wh.*", path); err != nil || matched {
-			return err
-		}
-
-		change := Change{
-			Path: path,
-		}
-
-		// Find out what kind of modification happened
-		file := filepath.Base(path)
-		// If there is a whiteout, then the file was removed
-		if strings.HasPrefix(file, ".wh.") {
-			originalFile := file[len(".wh."):]
-			change.Path = filepath.Join(filepath.Dir(path), originalFile)
-			change.Kind = ChangeDelete
-		} else {
-			// Otherwise, the file was added
-			change.Kind = ChangeAdd
-
-			// ...Unless it already existed in a top layer, in which case, it's a modification
-			for _, layer := range layers {
-				stat, err := os.Stat(filepath.Join(layer, path))
-				if err != nil && !os.IsNotExist(err) {
-					return err
-				}
-				if err == nil {
-					// The file existed in the top layer, so that's a modification
-
-					// However, if it's a directory, maybe it wasn't actually modified.
-					// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
-					if stat.IsDir() && f.IsDir() {
-						if f.Size() == stat.Size() && f.Mode() == stat.Mode() && f.ModTime() == stat.ModTime() {
-							// Both directories are the same, don't record the change
-							return nil
-						}
-					}
-					change.Kind = ChangeModify
-					break
-				}
-			}
-		}
-
-		// Record change
-		changes = append(changes, change)
-		return nil
-	})
-	if err != nil && !os.IsNotExist(err) {
-		return nil, err
-	}
-	return changes, nil
-}

+ 163 - 146
commands.go

@@ -463,6 +463,10 @@ func (cli *DockerCli) CmdInfo(args ...string) error {
 
 	fmt.Fprintf(cli.out, "Containers: %d\n", out.Containers)
 	fmt.Fprintf(cli.out, "Images: %d\n", out.Images)
+	fmt.Fprintf(cli.out, "Driver: %s\n", out.Driver)
+	for _, pair := range out.DriverStatus {
+		fmt.Fprintf(cli.out, " %s: %s\n", pair[0], pair[1])
+	}
 	if out.Debug || os.Getenv("DEBUG") != "" {
 		fmt.Fprintf(cli.out, "Debug mode (server): %v\n", out.Debug)
 		fmt.Fprintf(cli.out, "Debug mode (client): %v\n", os.Getenv("DEBUG") != "")
@@ -847,7 +851,7 @@ func (cli *DockerCli) CmdHistory(args ...string) error {
 				fmt.Fprintf(w, "%s\t", utils.TruncateID(out.ID))
 			}
 
-			fmt.Fprintf(w, "%s ago\t", utils.HumanDuration(time.Now().Sub(time.Unix(out.Created, 0))))
+			fmt.Fprintf(w, "%s ago\t", utils.HumanDuration(time.Now().UTC().Sub(time.Unix(out.Created, 0))))
 
 			if *noTrunc {
 				fmt.Fprintf(w, "%s\t", out.CreatedBy)
@@ -1128,16 +1132,18 @@ func (cli *DockerCli) CmdImages(args ...string) error {
 		}
 
 		var outs []APIImages
-		err = json.Unmarshal(body, &outs)
-		if err != nil {
+		if err := json.Unmarshal(body, &outs); err != nil {
 			return err
 		}
 
-		var startImageArg = cmd.Arg(0)
-		var startImage APIImages
+		var (
+			startImageArg = cmd.Arg(0)
+			startImage    APIImages
+
+			roots    []APIImages
+			byParent = make(map[string][]APIImages)
+		)
 
-		var roots []APIImages
-		var byParent = make(map[string][]APIImages)
 		for _, image := range outs {
 			if image.ParentId == "" {
 				roots = append(roots, image)
@@ -1202,7 +1208,7 @@ func (cli *DockerCli) CmdImages(args ...string) error {
 				}
 
 				if !*quiet {
-					fmt.Fprintf(w, "%s\t%s\t%s\t%s ago\t", repo, tag, out.ID, utils.HumanDuration(time.Now().Sub(time.Unix(out.Created, 0))))
+					fmt.Fprintf(w, "%s\t%s\t%s\t%s ago\t", repo, tag, out.ID, utils.HumanDuration(time.Now().UTC().Sub(time.Unix(out.Created, 0))))
 					if out.VirtualSize > 0 {
 						fmt.Fprintf(w, "%s (virtual %s)\n", utils.HumanSize(out.Size), utils.HumanSize(out.VirtualSize))
 					} else {
@@ -1344,7 +1350,7 @@ func (cli *DockerCli) CmdPs(args ...string) error {
 			if !*noTrunc {
 				out.Command = utils.Trunc(out.Command, 20)
 			}
-			fmt.Fprintf(w, "%s\t%s\t%s\t%s ago\t%s\t%s\t%s\t", out.ID, out.Image, out.Command, utils.HumanDuration(time.Now().Sub(time.Unix(out.Created, 0))), out.Status, displayablePorts(out.Ports), strings.Join(out.Names, ","))
+			fmt.Fprintf(w, "%s\t%s\t%s\t%s ago\t%s\t%s\t%s\t", out.ID, out.Image, out.Command, utils.HumanDuration(time.Now().UTC().Sub(time.Unix(out.Created, 0))), out.Status, displayablePorts(out.Ports), strings.Join(out.Names, ","))
 			if *size {
 				if out.SizeRootFs > 0 {
 					fmt.Fprintf(w, "%s (virtual %s)\n", utils.HumanSize(out.SizeRw), utils.HumanSize(out.SizeRootFs))
@@ -1451,7 +1457,7 @@ func (cli *DockerCli) CmdEvents(args ...string) error {
 }
 
 func (cli *DockerCli) CmdExport(args ...string) error {
-	cmd := cli.Subcmd("export", "CONTAINER", "Export the contents of a filesystem as a tar archive")
+	cmd := cli.Subcmd("export", "CONTAINER", "Export the contents of a filesystem as a tar archive to STDOUT")
 	if err := cmd.Parse(args); err != nil {
 		return nil
 	}
@@ -1632,15 +1638,7 @@ type ports []int
 // AttachOpts stores arguments to 'docker run -a', eg. which streams to attach to
 type AttachOpts map[string]bool
 
-func NewAttachOpts() AttachOpts {
-	return make(AttachOpts)
-}
-
-func (opts AttachOpts) String() string {
-	// Cast to underlying map type to avoid infinite recursion
-	return fmt.Sprintf("%v", map[string]bool(opts))
-}
-
+func (opts AttachOpts) String() string { return fmt.Sprintf("%v", map[string]bool(opts)) }
 func (opts AttachOpts) Set(val string) error {
 	if val != "stdin" && val != "stdout" && val != "stderr" {
 		return fmt.Errorf("Unsupported stream name: %s", val)
@@ -1649,24 +1647,21 @@ func (opts AttachOpts) Set(val string) error {
 	return nil
 }
 
-func (opts AttachOpts) Get(val string) bool {
-	if res, exists := opts[val]; exists {
-		return res
+// LinkOpts stores arguments to `docker run -link`
+type LinkOpts []string
+
+func (link LinkOpts) String() string { return fmt.Sprintf("%v", []string(link)) }
+func (link LinkOpts) Set(val string) error {
+	if _, err := parseLink(val); err != nil {
+		return err
 	}
-	return false
+	return nil
 }
 
 // PathOpts stores a unique set of absolute paths
 type PathOpts map[string]struct{}
 
-func NewPathOpts() PathOpts {
-	return make(PathOpts)
-}
-
-func (opts PathOpts) String() string {
-	return fmt.Sprintf("%v", map[string]struct{}(opts))
-}
-
+func (opts PathOpts) String() string { return fmt.Sprintf("%v", map[string]struct{}(opts)) }
 func (opts PathOpts) Set(val string) error {
 	var containerPath string
 
@@ -1730,60 +1725,60 @@ func ParseRun(args []string, capabilities *Capabilities) (*Config, *HostConfig,
 }
 
 func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Config, *HostConfig, *flag.FlagSet, error) {
+	var (
+		// FIXME: use utils.ListOpts for attach and volumes?
+		flAttach  = AttachOpts{}
+		flVolumes = PathOpts{}
+		flLinks   = LinkOpts{}
+
+		flPublish     utils.ListOpts
+		flExpose      utils.ListOpts
+		flEnv         utils.ListOpts
+		flDns         utils.ListOpts
+		flVolumesFrom utils.ListOpts
+		flLxcOpts     utils.ListOpts
+
+		flAutoRemove      = cmd.Bool("rm", false, "Automatically remove the container when it exits (incompatible with -d)")
+		flDetach          = cmd.Bool("d", false, "Detached mode: Run container in the background, print new container id")
+		flNetwork         = cmd.Bool("n", true, "Enable networking for this container")
+		flPrivileged      = cmd.Bool("privileged", false, "Give extended privileges to this container")
+		flPublishAll      = cmd.Bool("P", false, "Publish all exposed ports to the host interfaces")
+		flStdin           = cmd.Bool("i", false, "Keep stdin open even if not attached")
+		flTty             = cmd.Bool("t", false, "Allocate a pseudo-tty")
+		flContainerIDFile = cmd.String("cidfile", "", "Write the container ID to the file")
+		flEntrypoint      = cmd.String("entrypoint", "", "Overwrite the default entrypoint of the image")
+		flHostname        = cmd.String("h", "", "Container host name")
+		flMemoryString    = cmd.String("m", "", "Memory limit (format: <number><optional unit>, where unit = b, k, m or g)")
+		flUser            = cmd.String("u", "", "Username or UID")
+		flWorkingDir      = cmd.String("w", "", "Working directory inside the container")
+		flCpuShares       = cmd.Int64("c", 0, "CPU shares (relative weight)")
+
+		// For documentation purpose
+		_ = cmd.Bool("sig-proxy", true, "Proxify all received signal to the process (even in non-tty mode)")
+		_ = cmd.String("name", "", "Assign a name to the container")
+	)
 
-	flHostname := cmd.String("h", "", "Container host name")
-	flWorkingDir := cmd.String("w", "", "Working directory inside the container")
-	flUser := cmd.String("u", "", "Username or UID")
-	flDetach := cmd.Bool("d", false, "Detached mode: Run container in the background, print new container id")
-	flAttach := NewAttachOpts()
 	cmd.Var(flAttach, "a", "Attach to stdin, stdout or stderr.")
-	flStdin := cmd.Bool("i", false, "Keep stdin open even if not attached")
-	flTty := cmd.Bool("t", false, "Allocate a pseudo-tty")
-	flMemoryString := cmd.String("m", "", "Memory limit (format: <number><optional unit>, where unit = b, k, m or g)")
-	flContainerIDFile := cmd.String("cidfile", "", "Write the container ID to the file")
-	flNetwork := cmd.Bool("n", true, "Enable networking for this container")
-	flPrivileged := cmd.Bool("privileged", false, "Give extended privileges to this container")
-	flAutoRemove := cmd.Bool("rm", false, "Automatically remove the container when it exits (incompatible with -d)")
-	cmd.Bool("sig-proxy", true, "Proxify all received signal to the process (even in non-tty mode)")
-	cmd.String("name", "", "Assign a name to the container")
-	flPublishAll := cmd.Bool("P", false, "Publish all exposed ports to the host interfaces")
-
-	if capabilities != nil && *flMemoryString != "" && !capabilities.MemoryLimit {
-		//fmt.Fprintf(stdout, "WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n")
-		*flMemoryString = ""
-	}
-
-	flCpuShares := cmd.Int64("c", 0, "CPU shares (relative weight)")
-
-	var flPublish utils.ListOpts
-	cmd.Var(&flPublish, "p", "Publish a container's port to the host (use 'docker port' to see the actual mapping)")
+	cmd.Var(flVolumes, "v", "Bind mount a volume (e.g. from the host: -v /host:/container, from docker: -v /container)")
+	cmd.Var(flLinks, "link", "Add link to another container (name:alias)")
 
-	var flExpose utils.ListOpts
+	cmd.Var(&flPublish, "p", fmt.Sprintf("Publish a container's port to the host (format: %s) (use 'docker port' to see the actual mapping)", PortSpecTemplateFormat))
 	cmd.Var(&flExpose, "expose", "Expose a port from the container without publishing it to your host")
-
-	var flEnv utils.ListOpts
 	cmd.Var(&flEnv, "e", "Set environment variables")
-
-	var flDns utils.ListOpts
 	cmd.Var(&flDns, "dns", "Set custom dns servers")
-
-	flVolumes := NewPathOpts()
-	cmd.Var(flVolumes, "v", "Bind mount a volume (e.g. from the host: -v /host:/container, from docker: -v /container)")
-
-	var flVolumesFrom utils.ListOpts
 	cmd.Var(&flVolumesFrom, "volumes-from", "Mount volumes from the specified container(s)")
-
-	flEntrypoint := cmd.String("entrypoint", "", "Overwrite the default entrypoint of the image")
-
-	var flLxcOpts utils.ListOpts
 	cmd.Var(&flLxcOpts, "lxc-conf", "Add custom lxc options -lxc-conf=\"lxc.cgroup.cpuset.cpus = 0,1\"")
 
-	var flLinks utils.ListOpts
-	cmd.Var(&flLinks, "link", "Add link to another container (name:alias)")
-
 	if err := cmd.Parse(args); err != nil {
 		return nil, nil, cmd, err
 	}
+
+	// Check if the kernel supports memory limit cgroup.
+	if capabilities != nil && *flMemoryString != "" && !capabilities.MemoryLimit {
+		*flMemoryString = ""
+	}
+
+	// Validate input params
 	if *flDetach && len(flAttach) > 0 {
 		return nil, nil, cmd, ErrConflictAttachDetach
 	}
@@ -1805,8 +1800,7 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
 		}
 	}
 
-	envs := []string{}
-
+	var envs []string
 	for _, env := range flEnv {
 		arr := strings.Split(env, "=")
 		if len(arr) > 1 {
@@ -1818,19 +1812,15 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
 	}
 
 	var flMemory int64
-
 	if *flMemoryString != "" {
 		parsedMemory, err := utils.RAMInBytes(*flMemoryString)
-
 		if err != nil {
 			return nil, nil, cmd, err
 		}
-
 		flMemory = parsedMemory
 	}
 
 	var binds []string
-
 	// add any bind targets to the list of container volumes
 	for bind := range flVolumes {
 		arr := strings.Split(bind, ":")
@@ -1845,10 +1835,12 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
 		}
 	}
 
-	parsedArgs := cmd.Args()
-	runCmd := []string{}
-	entrypoint := []string{}
-	image := ""
+	var (
+		parsedArgs = cmd.Args()
+		runCmd     []string
+		entrypoint []string
+		image      string
+	)
 	if len(parsedArgs) >= 1 {
 		image = cmd.Arg(0)
 	}
@@ -1859,16 +1851,16 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
 		entrypoint = []string{*flEntrypoint}
 	}
 
-	var lxcConf []KeyValuePair
 	lxcConf, err := parseLxcConfOpts(flLxcOpts)
 	if err != nil {
 		return nil, nil, cmd, err
 	}
 
-	hostname := *flHostname
-	domainname := ""
-
-	parts := strings.SplitN(hostname, ".", 2)
+	var (
+		domainname string
+		hostname   = *flHostname
+		parts      = strings.SplitN(hostname, ".", 2)
+	)
 	if len(parts) > 1 {
 		hostname = parts[0]
 		domainname = parts[1]
@@ -1901,9 +1893,9 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
 		OpenStdin:       *flStdin,
 		Memory:          flMemory,
 		CpuShares:       *flCpuShares,
-		AttachStdin:     flAttach.Get("stdin"),
-		AttachStdout:    flAttach.Get("stdout"),
-		AttachStderr:    flAttach.Get("stderr"),
+		AttachStdin:     flAttach["stdin"],
+		AttachStdout:    flAttach["stdout"],
+		AttachStderr:    flAttach["stderr"],
 		Env:             envs,
 		Cmd:             runCmd,
 		Dns:             flDns,
@@ -1946,30 +1938,33 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 		return nil
 	}
 
-	flRm := cmd.Lookup("rm")
-	autoRemove, _ := strconv.ParseBool(flRm.Value.String())
+	// Retrieve relevant client-side config
+	var (
+		flName        = cmd.Lookup("name")
+		flRm          = cmd.Lookup("rm")
+		flSigProxy    = cmd.Lookup("sig-proxy")
+		autoRemove, _ = strconv.ParseBool(flRm.Value.String())
+		sigProxy, _   = strconv.ParseBool(flSigProxy.Value.String())
+	)
 
-	flSigProxy := cmd.Lookup("sig-proxy")
-	sigProxy, _ := strconv.ParseBool(flSigProxy.Value.String())
-	flName := cmd.Lookup("name")
+	// Disable sigProxy in case on TTY
 	if config.Tty {
 		sigProxy = false
 	}
 
-	var containerIDFile *os.File
+	var containerIDFile io.WriteCloser
 	if len(hostConfig.ContainerIDFile) > 0 {
-		if _, err := ioutil.ReadFile(hostConfig.ContainerIDFile); err == nil {
+		if _, err := os.Stat(hostConfig.ContainerIDFile); err == nil {
 			return fmt.Errorf("cid file found, make sure the other container isn't running or delete %s", hostConfig.ContainerIDFile)
 		}
-		containerIDFile, err = os.Create(hostConfig.ContainerIDFile)
-		if err != nil {
+		if containerIDFile, err = os.Create(hostConfig.ContainerIDFile); err != nil {
 			return fmt.Errorf("failed to create the container ID file: %s", err)
 		}
 		defer containerIDFile.Close()
 	}
+
 	containerValues := url.Values{}
-	name := flName.Value.String()
-	if name != "" {
+	if name := flName.Value.String(); name != "" {
 		containerValues.Set("name", name)
 	}
 
@@ -1990,8 +1985,7 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 		v.Set("tag", tag)
 
 		// Resolve the Repository name from fqn to endpoint + name
-		var endpoint string
-		endpoint, _, err = registry.ResolveRepositoryName(repos)
+		endpoint, _, err := registry.ResolveRepositoryName(repos)
 		if err != nil {
 			return err
 		}
@@ -2009,14 +2003,10 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 		registryAuthHeader := []string{
 			base64.URLEncoding.EncodeToString(buf),
 		}
-		err = cli.stream("POST", "/images/create?"+v.Encode(), nil, cli.err, map[string][]string{
-			"X-Registry-Auth": registryAuthHeader,
-		})
-		if err != nil {
+		if err = cli.stream("POST", "/images/create?"+v.Encode(), nil, cli.err, map[string][]string{"X-Registry-Auth": registryAuthHeader}); err != nil {
 			return err
 		}
-		body, _, err = cli.call("POST", "/containers/create?"+containerValues.Encode(), config)
-		if err != nil {
+		if body, _, err = cli.call("POST", "/containers/create?"+containerValues.Encode(), config); err != nil {
 			return err
 		}
 	}
@@ -2024,17 +2014,17 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 		return err
 	}
 
-	runResult := &APIRun{}
-	err = json.Unmarshal(body, runResult)
-	if err != nil {
+	var runResult APIRun
+	if err := json.Unmarshal(body, &runResult); err != nil {
 		return err
 	}
 
 	for _, warning := range runResult.Warnings {
 		fmt.Fprintf(cli.err, "WARNING: %s\n", warning)
 	}
+
 	if len(hostConfig.ContainerIDFile) > 0 {
-		if _, err = containerIDFile.WriteString(runResult.ID); err != nil {
+		if _, err = containerIDFile.Write([]byte(runResult.ID)); err != nil {
 			return fmt.Errorf("failed to write the container ID to the file: %s", err)
 		}
 	}
@@ -2045,27 +2035,38 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 	}
 
 	var (
-		wait  chan struct{}
-		errCh chan error
+		waitDisplayId chan struct{}
+		errCh         chan error
 	)
 
 	if !config.AttachStdout && !config.AttachStderr {
 		// Make this asynchrone in order to let the client write to stdin before having to read the ID
-		wait = make(chan struct{})
+		waitDisplayId = make(chan struct{})
 		go func() {
-			defer close(wait)
+			defer close(waitDisplayId)
 			fmt.Fprintf(cli.out, "%s\n", runResult.ID)
 		}()
 	}
 
-	hijacked := make(chan bool)
+	// We need to instanciate the chan because the select needs it. It can
+	// be closed but can't be uninitialized.
+	hijacked := make(chan io.Closer)
 
-	if config.AttachStdin || config.AttachStdout || config.AttachStderr {
+	// Block the return until the chan gets closed
+	defer func() {
+		utils.Debugf("End of CmdRun(), Waiting for hijack to finish.")
+		if _, ok := <-hijacked; ok {
+			utils.Errorf("Hijack did not finish (chan still open)")
+		}
+	}()
 
-		v := url.Values{}
+	if config.AttachStdin || config.AttachStdout || config.AttachStderr {
+		var (
+			out, stderr io.Writer
+			in          io.ReadCloser
+			v           = url.Values{}
+		)
 		v.Set("stream", "1")
-		var out, stderr io.Writer
-		var in io.ReadCloser
 
 		if config.AttachStdin {
 			v.Set("stdin", "1")
@@ -2093,7 +2094,12 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 
 	// Acknowledge the hijack before starting
 	select {
-	case <-hijacked:
+	case closer := <-hijacked:
+		// Make sure that hijack gets closed when returning. (result
+		// in closing hijack chan and freeing server's goroutines.
+		if closer != nil {
+			defer closer.Close()
+		}
 	case err := <-errCh:
 		if err != nil {
 			utils.Debugf("Error hijack: %s", err)
@@ -2119,31 +2125,37 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 		}
 	}
 
+	// Detached mode: wait for the id to be displayed and return.
 	if !config.AttachStdout && !config.AttachStderr {
 		// Detached mode
-		<-wait
-	} else {
-		running, status, err := getExitCode(cli, runResult.ID)
-		if err != nil {
+		<-waitDisplayId
+		return nil
+	}
+
+	var status int
+
+	// Attached mode
+	if autoRemove {
+		// Autoremove: wait for the container to finish, retrieve
+		// the exit code and remove the container
+		if _, _, err := cli.call("POST", "/containers/"+runResult.ID+"/wait", nil); err != nil {
 			return err
 		}
-		if autoRemove {
-			if running {
-				return fmt.Errorf("Impossible to auto-remove a detached container")
-			}
-			// Wait for the process to
-			if _, _, err := cli.call("POST", "/containers/"+runResult.ID+"/wait", nil); err != nil {
-				return err
-			}
-			if _, _, err := cli.call("DELETE", "/containers/"+runResult.ID, nil); err != nil {
-				return err
-			}
+		if _, status, err = getExitCode(cli, runResult.ID); err != nil {
+			return err
 		}
-		if status != 0 {
-			return &utils.StatusError{Status: status}
+		if _, _, err := cli.call("DELETE", "/containers/"+runResult.ID, nil); err != nil {
+			return err
+		}
+	} else {
+		// No Autoremove: Simply retrieve the exit code
+		if _, status, err = getExitCode(cli, runResult.ID); err != nil {
+			return err
 		}
 	}
-
+	if status != 0 {
+		return &utils.StatusError{Status: status}
+	}
 	return nil
 }
 
@@ -2175,7 +2187,7 @@ func (cli *DockerCli) CmdCp(args ...string) error {
 
 	if statusCode == 200 {
 		r := bytes.NewReader(data)
-		if err := archive.Untar(r, copyData.HostPath); err != nil {
+		if err := archive.Untar(r, copyData.HostPath, nil); err != nil {
 			return err
 		}
 	}
@@ -2328,7 +2340,7 @@ func (cli *DockerCli) stream(method, path string, in io.Reader, out io.Writer, h
 	}
 
 	if matchesContentType(resp.Header.Get("Content-Type"), "application/json") {
-		return utils.DisplayJSONMessagesStream(resp.Body, out)
+		return utils.DisplayJSONMessagesStream(resp.Body, out, cli.isTerminal)
 	}
 	if _, err := io.Copy(out, resp.Body); err != nil {
 		return err
@@ -2336,7 +2348,12 @@ func (cli *DockerCli) stream(method, path string, in io.Reader, out io.Writer, h
 	return nil
 }
 
-func (cli *DockerCli) hijack(method, path string, setRawTerminal bool, in io.ReadCloser, stdout, stderr io.Writer, started chan bool) error {
+func (cli *DockerCli) hijack(method, path string, setRawTerminal bool, in io.ReadCloser, stdout, stderr io.Writer, started chan io.Closer) error {
+	defer func() {
+		if started != nil {
+			close(started)
+		}
+	}()
 	// fixme: refactor client to support redirect
 	re := regexp.MustCompile("/+")
 	path = re.ReplaceAllString(path, "/")
@@ -2366,7 +2383,7 @@ func (cli *DockerCli) hijack(method, path string, setRawTerminal bool, in io.Rea
 	defer rwc.Close()
 
 	if started != nil {
-		started <- true
+		started <- rwc
 	}
 
 	var receiveStdout chan error

+ 2 - 0
config.go

@@ -16,6 +16,7 @@ type DaemonConfig struct {
 	BridgeIface                 string
 	DefaultIp                   net.IP
 	InterContainerCommunication bool
+	GraphDriver                 string
 }
 
 // ConfigFromJob creates and returns a new DaemonConfig object
@@ -37,5 +38,6 @@ func ConfigFromJob(job *engine.Job) *DaemonConfig {
 	}
 	config.DefaultIp = net.ParseIP(job.Getenv("DefaultIp"))
 	config.InterContainerCommunication = job.GetenvBool("InterContainerCommunication")
+	config.GraphDriver = job.Getenv("GraphDriver")
 	return &config
 }

+ 68 - 88
container.go

@@ -6,6 +6,7 @@ import (
 	"errors"
 	"fmt"
 	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/term"
 	"github.com/dotcloud/docker/utils"
 	"github.com/kr/pty"
@@ -16,7 +17,6 @@ import (
 	"os"
 	"os/exec"
 	"path"
-	"path/filepath"
 	"strconv"
 	"strings"
 	"sync"
@@ -26,8 +26,8 @@ import (
 
 type Container struct {
 	sync.Mutex
-
-	root string
+	root   string // Path to the "home" of the container, including metadata.
+	rootfs string // Path to the root filesystem of the container.
 
 	ID string
 
@@ -48,6 +48,7 @@ type Container struct {
 	HostnamePath   string
 	HostsPath      string
 	Name           string
+	Driver         string
 
 	cmd       *exec.Cmd
 	stdout    *utils.WriteBroadcaster
@@ -196,8 +197,13 @@ func (settings *NetworkSettings) PortMappingAPI() []APIPort {
 
 // Inject the io.Reader at the given path. Note: do not close the reader
 func (container *Container) Inject(file io.Reader, pth string) error {
+	if err := container.EnsureMounted(); err != nil {
+		return fmt.Errorf("inject: error mounting container %s: %s", container.ID, err)
+	}
+
 	// Return error if path exists
-	if _, err := os.Stat(path.Join(container.rwPath(), pth)); err == nil {
+	destPath := path.Join(container.RootfsPath(), pth)
+	if _, err := os.Stat(destPath); err == nil {
 		// Since err is nil, the path could be stat'd and it exists
 		return fmt.Errorf("%s exists", pth)
 	} else if !os.IsNotExist(err) {
@@ -208,14 +214,16 @@ func (container *Container) Inject(file io.Reader, pth string) error {
 	}
 
 	// Make sure the directory exists
-	if err := os.MkdirAll(path.Join(container.rwPath(), path.Dir(pth)), 0755); err != nil {
+	if err := os.MkdirAll(path.Join(container.RootfsPath(), path.Dir(pth)), 0755); err != nil {
 		return err
 	}
 
-	dest, err := os.Create(path.Join(container.rwPath(), pth))
+	dest, err := os.Create(destPath)
 	if err != nil {
 		return err
 	}
+	defer dest.Close()
+
 	if _, err := io.Copy(dest, file); err != nil {
 		return err
 	}
@@ -607,6 +615,7 @@ func (container *Container) Start() (err error) {
 		}
 	}
 
+	volumesDriver := container.runtime.volumes.driver
 	// Create the requested volumes if they don't exist
 	for volPath := range container.Config.Volumes {
 		volPath = path.Clean(volPath)
@@ -626,13 +635,17 @@ func (container *Container) Start() (err error) {
 			}
 			// Otherwise create an directory in $ROOT/volumes/ and use that
 		} else {
-			c, err := container.runtime.volumes.Create(nil, container, "", "", nil)
+
+			// Do not pass a container as the parameter for the volume creation.
+			// The graph driver using the container's information ( Image ) to
+			// create the parent.
+			c, err := container.runtime.volumes.Create(nil, nil, "", "", nil)
 			if err != nil {
 				return err
 			}
-			srcPath, err = c.layer()
+			srcPath, err = volumesDriver.Get(c.ID)
 			if err != nil {
-				return err
+				return fmt.Errorf("Driver %s failed to get volume rootfs %s: %s", volumesDriver, c.ID, err)
 			}
 			srcRW = true // RW by default
 		}
@@ -1231,15 +1244,14 @@ func (container *Container) Resize(h, w int) error {
 }
 
 func (container *Container) ExportRw() (archive.Archive, error) {
-	return archive.Tar(container.rwPath(), archive.Uncompressed)
-}
-
-func (container *Container) RwChecksum() (string, error) {
-	rwData, err := archive.Tar(container.rwPath(), archive.Xz)
-	if err != nil {
-		return "", err
+	if err := container.EnsureMounted(); err != nil {
+		return nil, err
+	}
+	if container.runtime == nil {
+		return nil, fmt.Errorf("Can't load storage driver for unregistered container %s", container.ID)
 	}
-	return utils.HashData(rwData)
+
+	return container.runtime.Diff(container)
 }
 
 func (container *Container) Export() (archive.Archive, error) {
@@ -1265,28 +1277,17 @@ func (container *Container) WaitTimeout(timeout time.Duration) error {
 }
 
 func (container *Container) EnsureMounted() error {
-	if mounted, err := container.Mounted(); err != nil {
-		return err
-	} else if mounted {
-		return nil
-	}
+	// FIXME: EnsureMounted is deprecated because drivers are now responsible
+	// for re-entrant mounting in their Get() method.
 	return container.Mount()
 }
 
 func (container *Container) Mount() error {
-	image, err := container.GetImage()
-	if err != nil {
-		return err
-	}
-	return image.Mount(container.RootfsPath(), container.rwPath())
+	return container.runtime.Mount(container)
 }
 
-func (container *Container) Changes() ([]Change, error) {
-	image, err := container.GetImage()
-	if err != nil {
-		return nil, err
-	}
-	return image.Changes(container.rwPath())
+func (container *Container) Changes() ([]archive.Change, error) {
+	return container.runtime.Changes(container)
 }
 
 func (container *Container) GetImage() (*Image, error) {
@@ -1296,18 +1297,8 @@ func (container *Container) GetImage() (*Image, error) {
 	return container.runtime.graph.Get(container.Image)
 }
 
-func (container *Container) Mounted() (bool, error) {
-	return Mounted(container.RootfsPath())
-}
-
 func (container *Container) Unmount() error {
-	if _, err := os.Stat(container.RootfsPath()); err != nil {
-		if os.IsNotExist(err) {
-			return nil
-		}
-		return err
-	}
-	return Unmount(container.RootfsPath())
+	return container.runtime.Unmount(container)
 }
 
 func (container *Container) logPath(name string) string {
@@ -1336,11 +1327,7 @@ func (container *Container) lxcConfigPath() string {
 
 // This method must be exported to be used from the lxc template
 func (container *Container) RootfsPath() string {
-	return path.Join(container.root, "rootfs")
-}
-
-func (container *Container) rwPath() string {
-	return path.Join(container.root, "rw")
+	return container.rootfs
 }
 
 func validateID(id string) error {
@@ -1352,49 +1339,38 @@ func validateID(id string) error {
 
 // GetSize, return real size, virtual size
 func (container *Container) GetSize() (int64, int64) {
-	var sizeRw, sizeRootfs int64
-	data := make(map[uint64]bool)
+	var (
+		sizeRw, sizeRootfs int64
+		err                error
+		driver             = container.runtime.driver
+	)
 
-	filepath.Walk(container.rwPath(), func(path string, fileInfo os.FileInfo, err error) error {
-		if fileInfo == nil {
-			return nil
+	if err := container.EnsureMounted(); err != nil {
+		utils.Errorf("Warning: failed to compute size of container rootfs %s: %s", container.ID, err)
+		return sizeRw, sizeRootfs
+	}
+
+	if differ, ok := container.runtime.driver.(graphdriver.Differ); ok {
+		sizeRw, err = differ.DiffSize(container.ID)
+		if err != nil {
+			utils.Errorf("Warning: driver %s couldn't return diff size of container %s: %s", driver, container.ID, err)
+			// FIXME: GetSize should return an error. Not changing it now in case
+			// there is a side-effect.
+			sizeRw = -1
 		}
-		size := fileInfo.Size()
-		if size == 0 {
-			return nil
+	} else {
+		changes, _ := container.Changes()
+		if changes != nil {
+			sizeRw = archive.ChangesSize(container.RootfsPath(), changes)
+		} else {
+			sizeRw = -1
 		}
+	}
 
-		inode := fileInfo.Sys().(*syscall.Stat_t).Ino
-		if _, entryExists := data[inode]; entryExists {
-			return nil
+	if _, err = os.Stat(container.RootfsPath()); err != nil {
+		if sizeRootfs, err = utils.TreeSize(container.RootfsPath()); err != nil {
+			sizeRootfs = -1
 		}
-		data[inode] = false
-
-		sizeRw += size
-		return nil
-	})
-
-	data = make(map[uint64]bool)
-	_, err := os.Stat(container.RootfsPath())
-	if err == nil {
-		filepath.Walk(container.RootfsPath(), func(path string, fileInfo os.FileInfo, err error) error {
-			if fileInfo == nil {
-				return nil
-			}
-			size := fileInfo.Size()
-			if size == 0 {
-				return nil
-			}
-
-			inode := fileInfo.Sys().(*syscall.Stat_t).Ino
-			if _, entryExists := data[inode]; entryExists {
-				return nil
-			}
-			data[inode] = false
-
-			sizeRootfs += size
-			return nil
-		})
 	}
 	return sizeRw, sizeRootfs
 }
@@ -1417,7 +1393,11 @@ func (container *Container) Copy(resource string) (archive.Archive, error) {
 		filter = []string{path.Base(basePath)}
 		basePath = path.Dir(basePath)
 	}
-	return archive.TarFilter(basePath, archive.Uncompressed, filter)
+	return archive.TarFilter(basePath, &archive.TarOptions{
+		Compression: archive.Uncompressed,
+		Includes:    filter,
+		Recursive:   true,
+	})
 }
 
 // Returns true if the container exposes a certain port

+ 170 - 0
contrib/docker-device-tool/device_tool.go

@@ -0,0 +1,170 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"github.com/dotcloud/docker/graphdriver/devmapper"
+	"os"
+	"path"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+func usage() {
+	fmt.Fprintf(os.Stderr, "Usage: %s <flags>  [status] | [list] | [device id]  | [resize new-pool-size] | [snap new-id base-id] | [remove id] | [mount id mountpoint]\n", os.Args[0])
+	flag.PrintDefaults()
+	os.Exit(1)
+}
+
+func byteSizeFromString(arg string) (int64, error) {
+	digits := ""
+	rest := ""
+	last := strings.LastIndexAny(arg, "0123456789")
+	if last >= 0 {
+		digits = arg[:last+1]
+		rest = arg[last+1:]
+	}
+
+	val, err := strconv.ParseInt(digits, 10, 64)
+	if err != nil {
+		return val, err
+	}
+
+	rest = strings.ToLower(strings.TrimSpace(rest))
+
+	var multiplier int64 = 1
+	switch rest {
+	case "":
+		multiplier = 1
+	case "k", "kb":
+		multiplier = 1024
+	case "m", "mb":
+		multiplier = 1024 * 1024
+	case "g", "gb":
+		multiplier = 1024 * 1024 * 1024
+	case "t", "tb":
+		multiplier = 1024 * 1024 * 1024 * 1024
+	default:
+		return 0, fmt.Errorf("Unknown size unit: %s", rest)
+	}
+
+	return val * multiplier, nil
+}
+
+func main() {
+	root := flag.String("r", "/var/lib/docker", "Docker root dir")
+	flDebug := flag.Bool("D", false, "Debug mode")
+
+	flag.Parse()
+
+	if *flDebug {
+		os.Setenv("DEBUG", "1")
+	}
+
+	if flag.NArg() < 1 {
+		usage()
+	}
+
+	args := flag.Args()
+
+	home := path.Join(*root, "devicemapper")
+	devices, err := devmapper.NewDeviceSet(home, false)
+	if err != nil {
+		fmt.Println("Can't initialize device mapper: ", err)
+		os.Exit(1)
+	}
+
+	switch args[0] {
+	case "status":
+		status := devices.Status()
+		fmt.Printf("Pool name: %s\n", status.PoolName)
+		fmt.Printf("Data Loopback file: %s\n", status.DataLoopback)
+		fmt.Printf("Metadata Loopback file: %s\n", status.MetadataLoopback)
+		fmt.Printf("Sector size: %d\n", status.SectorSize)
+		fmt.Printf("Data use: %d of %d (%.1f %%)\n", status.Data.Used, status.Data.Total, 100.0*float64(status.Data.Used)/float64(status.Data.Total))
+		fmt.Printf("Metadata use: %d of %d (%.1f %%)\n", status.Metadata.Used, status.Metadata.Total, 100.0*float64(status.Metadata.Used)/float64(status.Metadata.Total))
+		break
+	case "list":
+		ids := devices.List()
+		sort.Strings(ids)
+		for _, id := range ids {
+			fmt.Println(id)
+		}
+		break
+	case "device":
+		if flag.NArg() < 2 {
+			usage()
+		}
+		status, err := devices.GetDeviceStatus(args[1])
+		if err != nil {
+			fmt.Println("Can't get device info: ", err)
+			os.Exit(1)
+		}
+		fmt.Printf("Id: %d\n", status.DeviceId)
+		fmt.Printf("Size: %d\n", status.Size)
+		fmt.Printf("Transaction Id: %d\n", status.TransactionId)
+		fmt.Printf("Size in Sectors: %d\n", status.SizeInSectors)
+		fmt.Printf("Mapped Sectors: %d\n", status.MappedSectors)
+		fmt.Printf("Highest Mapped Sector: %d\n", status.HighestMappedSector)
+		break
+	case "resize":
+		if flag.NArg() < 2 {
+			usage()
+		}
+
+		size, err := byteSizeFromString(args[1])
+		if err != nil {
+			fmt.Println("Invalid size: ", err)
+			os.Exit(1)
+		}
+
+		err = devices.ResizePool(size)
+		if err != nil {
+			fmt.Println("Error resizeing pool: ", err)
+			os.Exit(1)
+		}
+
+		break
+	case "snap":
+		if flag.NArg() < 3 {
+			usage()
+		}
+
+		err := devices.AddDevice(args[1], args[2])
+		if err != nil {
+			fmt.Println("Can't create snap device: ", err)
+			os.Exit(1)
+		}
+		break
+	case "remove":
+		if flag.NArg() < 2 {
+			usage()
+		}
+
+		err := devices.RemoveDevice(args[1])
+		if err != nil {
+			fmt.Println("Can't remove device: ", err)
+			os.Exit(1)
+		}
+		break
+	case "mount":
+		if flag.NArg() < 3 {
+			usage()
+		}
+
+		err := devices.MountDevice(args[1], args[2], false)
+		if err != nil {
+			fmt.Println("Can't create snap device: ", err)
+			os.Exit(1)
+		}
+		break
+	default:
+		fmt.Printf("Unknown command %s\n", args[0])
+		usage()
+
+		os.Exit(1)
+	}
+
+	return
+}

+ 12 - 10
docker/docker.go

@@ -25,19 +25,20 @@ func main() {
 	}
 	// FIXME: Switch d and D ? (to be more sshd like)
 	flVersion := flag.Bool("v", false, "Print version information and quit")
-	flDaemon := flag.Bool("d", false, "Daemon mode")
-	flDebug := flag.Bool("D", false, "Debug mode")
+	flDaemon := flag.Bool("d", false, "Enable daemon mode")
+	flDebug := flag.Bool("D", false, "Enable debug mode")
 	flAutoRestart := flag.Bool("r", true, "Restart previously running containers")
-	bridgeName := flag.String("b", "", "Attach containers to a pre-existing network bridge. Use 'none' to disable container networking")
-	pidfile := flag.String("p", "/var/run/docker.pid", "File containing process PID")
-	flRoot := flag.String("g", "/var/lib/docker", "Path to use as the root of the docker runtime.")
-	flEnableCors := flag.Bool("api-enable-cors", false, "Enable CORS requests in the remote api.")
-	flDns := flag.String("dns", "", "Set custom dns servers")
+	bridgeName := flag.String("b", "", "Attach containers to a pre-existing network bridge; use 'none' to disable container networking")
+	pidfile := flag.String("p", "/var/run/docker.pid", "Path to use for daemon PID file")
+	flRoot := flag.String("g", "/var/lib/docker", "Path to use as the root of the docker runtime")
+	flEnableCors := flag.Bool("api-enable-cors", false, "Enable CORS headers in the remote API")
+	flDns := flag.String("dns", "", "Force docker to use specific DNS servers")
 	flHosts := utils.ListOpts{fmt.Sprintf("unix://%s", docker.DEFAULTUNIXSOCKET)}
-	flag.Var(&flHosts, "H", "tcp://host:port to bind/connect to or unix://path/to/socket to use")
-	flEnableIptables := flag.Bool("iptables", true, "Disable iptables within docker")
-	flDefaultIp := flag.String("ip", "0.0.0.0", "Default ip address to use when binding a containers ports")
+	flag.Var(&flHosts, "H", "Multiple tcp://host:port or unix://path/to/socket to bind in daemon mode, single connection otherwise")
+	flEnableIptables := flag.Bool("iptables", true, "Disable docker's addition of iptables rules")
+	flDefaultIp := flag.String("ip", "0.0.0.0", "Default IP address to use when binding container ports")
 	flInterContainerComm := flag.Bool("icc", true, "Enable inter-container communication")
+	flGraphDriver := flag.String("s", "", "Force the docker runtime to use a specific storage driver")
 
 	flag.Parse()
 
@@ -82,6 +83,7 @@ func main() {
 		job.Setenv("BridgeIface", *bridgeName)
 		job.Setenv("DefaultIp", *flDefaultIp)
 		job.SetenvBool("InterContainerCommunication", *flInterContainerComm)
+		job.Setenv("GraphDriver", *flGraphDriver)
 		if err := job.Run(); err != nil {
 			log.Fatal(err)
 		}

+ 1 - 1
docs/Dockerfile

@@ -9,7 +9,7 @@ run apt-get install -y python-setuptools make
 run easy_install pip
 #from docs/requirements.txt, but here to increase cacheability
 run pip install Sphinx==1.1.3
-run pip install sphinxcontrib-httpdomain==1.1.8
+run pip install sphinxcontrib-httpdomain==1.1.9
 add . /docs
 run cd /docs; make docs
 

+ 7 - 6
docs/README.md

@@ -41,11 +41,12 @@ its dependencies. There are two main ways to install this tool:
 
 ###Native Installation
 
-* Install sphinx: `pip install sphinx`
-    * Mac OS X: `[sudo] pip-2.7 install sphinx`
-* Install sphinx httpdomain contrib package: `pip install sphinxcontrib-httpdomain`
-    * Mac OS X: `[sudo] pip-2.7 install sphinxcontrib-httpdomain`
-* If pip is not available you can probably install it using your favorite package manager as **python-pip**
+Install dependencies from `requirements.txt` file in your `docker/docs`
+directory:
+
+* Linux: `pip install -r docs/requirements.txt`
+
+* Mac OS X: `[sudo] pip-2.7 -r docs/requirements.txt`
 
 ###Alternative Installation: Docker Container
 
@@ -136,7 +137,7 @@ Manpages
 --------
 
 * To make the manpages, run ``make man``. Please note there is a bug
-  in spinx 1.1.3 which makes this fail.  Upgrade to the latest version
+  in Sphinx 1.1.3 which makes this fail.  Upgrade to the latest version
   of Sphinx.
 * Then preview the manpage by running ``man _build/man/docker.1``,
   where ``_build/man/docker.1`` is the path to the generated manfile

+ 46 - 1
docs/sources/commandline/cli.rst

@@ -18,6 +18,38 @@ To list available commands, either run ``docker`` with no parameters or execute
 
     ...
 
+.. _cli_daemon:
+
+``daemon``
+----------
+
+::
+
+    Usage of docker:
+      -D=false: Enable debug mode
+      -H=[unix:///var/run/docker.sock]: Multiple tcp://host:port or unix://path/to/socket to bind in daemon mode, single connection otherwise
+      -api-enable-cors=false: Enable CORS headers in the remote API
+      -b="": Attach containers to a pre-existing network bridge; use 'none' to disable container networking
+      -d=false: Enable daemon mode
+      -dns="": Force docker to use specific DNS servers
+      -g="/var/lib/docker": Path to use as the root of the docker runtime
+      -icc=true: Enable inter-container communication
+      -ip="0.0.0.0": Default IP address to use when binding container ports
+      -iptables=true: Disable docker's addition of iptables rules
+      -p="/var/run/docker.pid": Path to use for daemon PID file
+      -r=true: Restart previously running containers
+      -s="": Force the docker runtime to use a specific storage driver
+      -v=false: Print version information and quit
+
+The docker daemon is the persistent process that manages containers.  Docker uses the same binary for both the 
+daemon and client.  To run the daemon you provide the ``-d`` flag.
+
+To force docker to use devicemapper as the storage driver, use ``docker -d -s devicemapper``
+
+To set the dns server for all docker containers, use ``docker -d -dns 8.8.8.8``
+
+To run the daemon with debug output, use ``docker -d -D``
+
 .. _cli_attach:
 
 ``attach``
@@ -369,7 +401,13 @@ Show events in the past from a specified time
 
     Usage: docker export CONTAINER
 
-    Export the contents of a filesystem as a tar archive
+    Export the contents of a filesystem as a tar archive to STDOUT
+    
+for example:
+
+.. code-block:: bash
+
+    $ sudo docker export red_panda > latest.tar
 
 .. _cli_history:
 
@@ -591,6 +629,12 @@ might not get preserved.
 
     Insert a file from URL in the IMAGE at PATH
 
+Use the specified IMAGE as the parent for a new image which adds a
+:ref:`layer <layer_def>` containing the new file. ``insert`` does not modify 
+the original image, and the new image has the contents of the parent image, 
+plus the new file.
+
+
 Examples
 ~~~~~~~~
 
@@ -600,6 +644,7 @@ Insert file from github
 .. code-block:: bash
 
     $ sudo docker insert 8283e18b24bc https://raw.github.com/metalivedev/django/master/postinstall /tmp/postinstall.sh
+    06fd35556d7b
 
 .. _cli_inspect:
 

+ 137 - 0
docs/sources/examples/cfengine_process_management.rst

@@ -0,0 +1,137 @@
+:title: Process Management with CFEngine
+:description: Managing containerized processes with CFEngine
+:keywords: cfengine, process, management, usage, docker, documentation
+
+Process Management with CFEngine
+================================
+
+Create Docker containers with managed processes.
+
+Docker monitors one process in each running container and the container lives or dies with that process.
+By introducing CFEngine inside Docker containers, we can alleviate a few of the issues that may arise:
+
+* It is possible to easily start multiple processes within a container, all of which will be managed automatically, with the normal ``docker run`` command.
+* If a managed process dies or crashes, CFEngine will start it again within 1 minute.
+* The container itself will live as long as the CFEngine scheduling daemon (cf-execd) lives. With CFEngine, we are able to decouple the life of the container from the uptime of the service it provides.
+
+
+How it works
+------------
+
+CFEngine, together with the cfe-docker integration policies, are installed as part of the Dockerfile. This builds CFEngine into our Docker image.
+
+The Dockerfile's ``ENTRYPOINT`` takes an arbitrary amount of commands (with any desired arguments) as parameters.
+When we run the Docker container these parameters get written to CFEngine policies and CFEngine takes over to ensure that the desired processes are running in the container.
+
+CFEngine scans the process table for the ``basename`` of the commands given to the ``ENTRYPOINT`` and runs the command to start the process if the ``basename`` is not found.
+For example, if we start the container with ``docker run "/path/to/my/application parameters"``, CFEngine will look for a process named ``application`` and run the command.
+If an entry for ``application`` is not found in the process table at any point in time, CFEngine will execute ``/path/to/my/application parameters`` to start the application once again.
+The check on the process table happens every minute.
+
+Note that it is therefore important that the command to start your application leaves a process with the basename of the command.
+This can be made more flexible by making some minor adjustments to the CFEngine policies, if desired.
+
+
+Usage
+-----
+
+This example assumes you have Docker installed and working.
+We will install and manage ``apache2`` and ``sshd`` in a single container.
+
+There are three steps:
+
+1. Install CFEngine into the container.
+2. Copy the CFEngine Docker process management policy into the containerized CFEngine installation.
+3. Start your application processes as part of the ``docker run`` command.
+
+
+Building the container image
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The first two steps can be done as part of a Dockerfile, as follows.
+
+.. code-block:: bash
+
+    FROM ubuntu
+    MAINTAINER Eystein Måløy Stenberg <eytein.stenberg@gmail.com>
+
+    RUN apt-get -y install wget lsb-release unzip
+
+    # install latest CFEngine
+    RUN wget -qO- http://cfengine.com/pub/gpg.key | apt-key add -
+    RUN echo "deb http://cfengine.com/pub/apt $(lsb_release -cs) main" > /etc/apt/sources.list.d/cfengine-community.list
+    RUN apt-get update
+    RUN apt-get install cfengine-community
+
+    # install cfe-docker process management policy
+    RUN wget --no-check-certificate https://github.com/estenberg/cfe-docker/archive/master.zip -P /tmp/ && unzip /tmp/master.zip -d /tmp/
+    RUN cp /tmp/cfe-docker-master/cfengine/bin/* /var/cfengine/bin/
+    RUN cp /tmp/cfe-docker-master/cfengine/inputs/* /var/cfengine/inputs/
+    RUN rm -rf /tmp/cfe-docker-master /tmp/master.zip
+
+    # apache2 and openssh are just for testing purposes, install your own apps here
+    RUN apt-get -y install openssh-server apache2
+    RUN mkdir -p /var/run/sshd
+    RUN echo "root:password" | chpasswd  # need a password for ssh
+
+    ENTRYPOINT ["/var/cfengine/bin/docker_processes_run.sh"]
+
+
+By saving this file as ``Dockerfile`` to a working directory, you can then build your container with the docker build command,
+e.g. ``docker build -t managed_image``.
+
+Testing the container
+~~~~~~~~~~~~~~~~~~~~~
+
+Start the container with ``apache2`` and ``sshd`` running and managed, forwarding a port to our SSH instance:
+
+.. code-block:: bash
+
+    docker run -p 127.0.0.1:222:22 -d managed_image "/usr/sbin/sshd" "/etc/init.d/apache2 start"
+
+We now clearly see one of the benefits of the cfe-docker integration: it allows to start several processes
+as part of a normal ``docker run`` command.
+
+We can now log in to our new container and see that both ``apache2`` and ``sshd`` are running. We have set the root password to
+"password" in the Dockerfile above and can use that to log in with ssh:
+
+.. code-block:: bash
+
+    ssh -p222 root@127.0.0.1
+
+    ps -ef
+    UID        PID  PPID  C STIME TTY          TIME CMD
+    root         1     0  0 07:48 ?        00:00:00 /bin/bash /var/cfengine/bin/docker_processes_run.sh /usr/sbin/sshd /etc/init.d/apache2 start
+    root        18     1  0 07:48 ?        00:00:00 /var/cfengine/bin/cf-execd -F
+    root        20     1  0 07:48 ?        00:00:00 /usr/sbin/sshd
+    root        32     1  0 07:48 ?        00:00:00 /usr/sbin/apache2 -k start
+    www-data    34    32  0 07:48 ?        00:00:00 /usr/sbin/apache2 -k start
+    www-data    35    32  0 07:48 ?        00:00:00 /usr/sbin/apache2 -k start
+    www-data    36    32  0 07:48 ?        00:00:00 /usr/sbin/apache2 -k start
+    root        93    20  0 07:48 ?        00:00:00 sshd: root@pts/0 
+    root       105    93  0 07:48 pts/0    00:00:00 -bash
+    root       112   105  0 07:49 pts/0    00:00:00 ps -ef
+
+
+If we stop apache2, it will be started again within a minute by CFEngine.
+
+.. code-block:: bash
+
+    service apache2 status
+     Apache2 is running (pid 32).
+    service apache2 stop
+             * Stopping web server apache2 ... waiting    [ OK ]
+    service apache2 status
+     Apache2 is NOT running.
+    # ... wait up to 1 minute...
+    service apache2 status
+     Apache2 is running (pid 173).
+
+
+Adapting to your applications
+-----------------------------
+
+To make sure your applications get managed in the same manner, there are just two things you need to adjust from the above example:
+
+* In the Dockerfile used above, install your applications instead of ``apache2`` and ``sshd``.
+* When you start the container with ``docker run``, specify the command line arguments to your applications rather than ``apache2`` and ``sshd``.

+ 2 - 0
docs/sources/examples/index.rst

@@ -24,3 +24,5 @@ to more substantial services like those which you might find in production.
    postgresql_service
    mongodb
    running_riak_service
+   using_supervisord
+   cfengine_process_management

+ 128 - 0
docs/sources/examples/using_supervisord.rst

@@ -0,0 +1,128 @@
+:title: Using Supervisor with Docker
+:description: How to use Supervisor process management with Docker
+:keywords: docker, supervisor, process management
+
+.. _using_supervisord:
+
+Using Supervisor with Docker
+============================
+
+.. include:: example_header.inc
+
+Traditionally a Docker container runs a single process when it is launched, for
+example an Apache daemon or a SSH server daemon. Often though you want to run
+more than one process in a container. There are a number of ways you can
+achieve this ranging from using a simple Bash script as the value of your
+container's ``CMD`` instruction to installing a process management tool.
+
+In this example we're going to make use of the process management tool,
+`Supervisor <http://supervisord.org/>`_, to manage multiple processes in our
+container. Using Supervisor allows us to better control, manage, and restart the
+processes we want to run. To demonstrate this we're going to install and manage both an
+SSH daemon and an Apache daemon.
+
+Creating a Dockerfile
+---------------------
+
+Let's start by creating a basic ``Dockerfile`` for our new image.
+
+.. code-block:: bash
+
+    FROM ubuntu:latest
+    MAINTAINER examples@docker.io
+    RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list
+    RUN apt-get update
+    RUN apt-get upgrade -y
+
+Installing Supervisor
+---------------------
+
+We can now install our SSH and Apache daemons as well as Supervisor in our container.
+
+.. code-block:: bash
+
+    RUN apt-get install -y openssh-server apache2 supervisor
+    RUN mkdir -p /var/run/sshd
+    RUN mkdir -p /var/log/supervisor
+
+Here we're installing the ``openssh-server``, ``apache2`` and ``supervisor``
+(which provides the Supervisor daemon) packages. We're also creating two new
+directories that are needed to run our SSH daemon and Supervisor.
+
+Adding Supervisor's configuration file
+--------------------------------------
+
+Now let's add a configuration file for Supervisor. The default file is called
+``supervisord.conf`` and is located in ``/etc/supervisor/conf.d/``.
+
+.. code-block:: bash
+
+    ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+Let's see what is inside our ``supervisord.conf`` file.
+
+.. code-block:: bash
+
+    [supervisord]
+    nodaemon=true
+
+    [program:sshd]
+    command=/usr/sbin/sshd -D
+
+    [program:apache2]
+    command=/bin/bash -c "source /etc/apache2/envvars && /usr/sbin/apache2 -DFOREGROUND"
+
+The ``supervisord.conf`` configuration file contains directives that configure
+Supervisor and the processes it manages. The first block ``[supervisord]``
+provides configuration for Supervisor itself. We're using one directive,
+``nodaemon`` which tells Supervisor to run interactively rather than daemonize.
+
+The next two blocks manage the services we wish to control. Each block controls
+a separate process. The blocks contain a single directive, ``command``, which
+specifies what command to run to start each process.
+
+Exposing ports and running Supervisor
+-------------------------------------
+
+Now let's finish our ``Dockerfile`` by exposing some required ports and
+specifying the ``CMD`` instruction to start Supervisor when our container
+launches.
+
+.. code-block:: bash
+
+    EXPOSE 22 80
+    CMD ["/usr/bin/supervisord"]
+
+Here we've exposed ports 22 and 80 on the container and we're running the
+``/usr/bin/supervisord`` binary when the container launches.
+
+Building our container
+----------------------
+
+We can now build our new container.
+
+.. code-block:: bash
+
+    sudo docker build -t <yourname>/supervisord .
+
+Running our Supervisor container
+--------------------------------
+
+Once we've got a built image we can launch a container from it.
+
+.. code-block:: bash
+
+    sudo docker run -p 22 -p 80 -t -i <yourname>/supervisor
+    2013-11-25 18:53:22,312 CRIT Supervisor running as root (no user in config file)
+    2013-11-25 18:53:22,312 WARN Included extra file "/etc/supervisor/conf.d/supervisord.conf" during parsing
+    2013-11-25 18:53:22,342 INFO supervisord started with pid 1
+    2013-11-25 18:53:23,346 INFO spawned: 'sshd' with pid 6
+    2013-11-25 18:53:23,349 INFO spawned: 'apache2' with pid 7
+    . . .
+
+We've launched a new container interactively using the ``docker run`` command.
+That container has run Supervisor and launched the SSH and Apache daemons with
+it. We've specified the ``-p`` flag to expose ports 22 and 80. From here we can
+now identify the exposed ports and connect to one or both of the SSH and Apache
+daemons.
+

+ 5 - 5
docs/sources/installation/archlinux.rst

@@ -1,5 +1,5 @@
 :title: Installation on Arch Linux
-:description: Docker installation on Arch Linux. 
+:description: Docker installation on Arch Linux.
 :keywords: arch linux, virtualization, docker, documentation, installation
 
 .. _arch_linux:
@@ -7,6 +7,10 @@
 Arch Linux
 ==========
 
+.. include:: install_header.inc
+
+.. include:: install_unofficial.inc
+
 Installing on Arch Linux is not officially supported but can be handled via 
 either of the following AUR packages:
 
@@ -32,10 +36,6 @@ either AUR package.
 Installation
 ------------
 
-.. include:: install_header.inc
-
-.. include:: install_unofficial.inc
-
 The instructions here assume **yaourt** is installed.  See 
 `Arch User Repository <https://wiki.archlinux.org/index.php/Arch_User_Repository#Installing_packages>`_
 for information on building and installing packages from the AUR if you have not

+ 4 - 12
docs/sources/installation/binaries.rst

@@ -12,17 +12,9 @@ Binaries
 **This instruction set is meant for hackers who want to try out Docker
 on a variety of environments.**
 
-Right now, the officially supported distributions are:
-
-- :ref:`ubuntu_precise`
-- :ref:`ubuntu_raring`
-
-
-But we know people have had success running it under
-
-- Debian
-- Suse
-- :ref:`arch_linux`
+Before following these directions, you should really check if a packaged version
+of Docker is already available for your distribution.  We have packages for many
+distributions, and more keep showing up all the time!
 
 Check Your Kernel
 -----------------
@@ -34,7 +26,7 @@ Get the docker binary:
 
 .. code-block:: bash
 
-    wget --output-document=docker https://get.docker.io/builds/Linux/x86_64/docker-latest
+    wget https://get.docker.io/builds/Linux/x86_64/docker-latest -O docker
     chmod +x docker
 
 

+ 19 - 0
docs/sources/installation/fedora.rst

@@ -0,0 +1,19 @@
+:title: Requirements and Installation on Fedora
+:description: Please note this project is currently under heavy development. It should not be used in production.
+:keywords: Docker, Docker documentation, requirements, virtualbox, vagrant, git, ssh, putty, cygwin, linux
+
+.. _fedora:
+
+Fedora
+======
+
+.. include:: install_header.inc
+
+.. include:: install_unofficial.inc
+
+.. warning::
+
+   This is a placeholder for the Fedora installation instructions. Currently there is not an available
+   Docker package in the Fedora distribution. These packages are being built and should be available soon.
+   These instructions will be updated when the package is available.
+

+ 14 - 12
docs/sources/installation/gentoolinux.rst

@@ -4,8 +4,8 @@
 
 .. _gentoo_linux:
 
-Gentoo Linux
-============
+Gentoo
+======
 
 .. include:: install_header.inc
 
@@ -22,17 +22,19 @@ provided at https://github.com/tianon/docker-overlay which can be added using
 properly installing and using the overlay can be found in `the overlay README
 <https://github.com/tianon/docker-overlay/blob/master/README.md#using-this-overlay>`_.
 
+Note that sometimes there is a disparity between the latest version and what's
+in the overlay, and between the latest version in the overlay and what's in the
+portage tree.  Please be patient, and the latest version should propagate
+shortly.
+
 Installation
 ^^^^^^^^^^^^
 
 The package should properly pull in all the necessary dependencies and prompt
-for all necessary kernel options.  For the most straightforward installation
-experience, use ``sys-kernel/aufs-sources`` as your kernel sources.  If you
-prefer not to use ``sys-kernel/aufs-sources``, the portage tree also contains
-``sys-fs/aufs3``, which includes the patches necessary for adding AUFS support
-to other kernel source packages such as ``sys-kernel/gentoo-sources`` (and a
-``kernel-patch`` USE flag to perform the patching to ``/usr/src/linux``
-automatically).
+for all necessary kernel options.  The ebuilds for 0.7+ include use flags to
+pull in the proper dependencies of the major storage drivers, with the
+"device-mapper" use flag being enabled by default, since that is the simplest
+installation path.
 
 .. code-block:: bash
 
@@ -47,9 +49,9 @@ the #docker IRC channel on the freenode network.
 Starting Docker
 ^^^^^^^^^^^^^^^
 
-Ensure that you are running a kernel that includes the necessary AUFS
-patches/support and includes all the necessary modules and/or configuration for
-LXC.
+Ensure that you are running a kernel that includes all the necessary modules
+and/or configuration for LXC (and optionally for device-mapper and/or AUFS,
+depending on the storage driver you've decided to use).
 
 OpenRC
 ------

+ 8 - 7
docs/sources/installation/index.rst

@@ -9,7 +9,7 @@ Installation
 
 There are a number of ways to install Docker, depending on where you
 want to run the daemon. The :ref:`ubuntu_linux` installation is the
-officially-tested version, and the community adds more techniques for
+officially-tested version. The community adds more techniques for
 installing Docker all the time.
 
 Contents:
@@ -18,13 +18,14 @@ Contents:
    :maxdepth: 1
 
    ubuntulinux
-   binaries
-   security
-   upgrading
-   kernel
+   fedora
+   archlinux
+   gentoolinux
    vagrant
    windows
    amazon
    rackspace
-   archlinux
-   gentoolinux
+   kernel
+   binaries
+   security
+   upgrading

+ 2 - 13
docs/sources/installation/kernel.rst

@@ -11,10 +11,10 @@ In short, Docker has the following kernel requirements:
 
 - Linux version 3.8 or above.
 
-- `AUFS support <http://aufs.sourceforge.net/>`_.
-
 - Cgroups and namespaces must be enabled.
 
+*Note: as of 0.7 docker no longer requires aufs. AUFS support is still available as an optional driver.*
+
 The officially supported kernel is the one recommended by the
 :ref:`ubuntu_linux` installation path. It is the one that most developers
 will use, and the one that receives the most attention from the core
@@ -58,17 +58,6 @@ detects something older than 3.8.
 See issue `#407 <https://github.com/dotcloud/docker/issues/407>`_ for details.
 
 
-AUFS support
-------------
-
-Docker currently relies on AUFS, an unioning filesystem.
-While AUFS is included in the kernels built by the Debian and Ubuntu
-distributions, is not part of the standard kernel. This means that if
-you decide to roll your own kernel, you will have to patch your
-kernel tree to add AUFS. The process is documented on
-`AUFS webpage <http://aufs.sourceforge.net/>`_.
-
-
 Cgroups and namespaces
 ----------------------
 

+ 8 - 9
docs/sources/installation/rackspace.rst

@@ -2,7 +2,6 @@
 :description: Installing Docker on Ubuntu proviced by Rackspace
 :keywords: Rackspace Cloud, installation, docker, linux, ubuntu
 
-===============
 Rackspace Cloud
 ===============
 
@@ -14,14 +13,14 @@ straightforward, and you should mostly be able to follow the
 
 **However, there is one caveat:**
 
-If you are using any linux not already shipping with the 3.8 kernel
+If you are using any Linux not already shipping with the 3.8 kernel
 you will need to install it. And this is a little more difficult on
 Rackspace.
 
 Rackspace boots their servers using grub's ``menu.lst`` and does not
-like non 'virtual' packages (e.g. xen compatible) kernels there,
-although they do work. This makes ``update-grub`` to not have the
-expected result, and you need to set the kernel manually.
+like non 'virtual' packages (e.g. Xen compatible) kernels there,
+although they do work. This results in ``update-grub`` not having the
+expected result, and you will need to set the kernel manually.
 
 **Do not attempt this on a production machine!**
 
@@ -34,7 +33,7 @@ expected result, and you need to set the kernel manually.
     apt-get install linux-generic-lts-raring
 
 
-Great, now you have kernel installed in ``/boot/``, next is to make it
+Great, now you have the kernel installed in ``/boot/``, next you need to make it
 boot next time.
 
 .. code-block:: bash
@@ -48,9 +47,9 @@ boot next time.
 Now you need to manually edit ``/boot/grub/menu.lst``, you will find a
 section at the bottom with the existing options.  Copy the top one and
 substitute the new kernel into that. Make sure the new kernel is on
-top, and double check kernel and initrd point to the right files.
+top, and double check the kernel and initrd lines point to the right files.
 
-Make special care to double check the kernel and initrd entries.
+Take special care to double check the kernel and initrd entries.
 
 .. code-block:: bash
 
@@ -79,7 +78,7 @@ It will probably look something like this:
      initrd		/boot/initrd.img-3.2.0-38-virtual
 
 
-Reboot server (either via command line or console)
+Reboot the server (either via command line or console)
 
 .. code-block:: bash
 

+ 43 - 41
docs/sources/installation/ubuntulinux.rst

@@ -4,8 +4,8 @@
 
 .. _ubuntu_linux:
 
-Ubuntu Linux
-============
+Ubuntu
+======
 
 .. warning::
 
@@ -14,16 +14,11 @@ Ubuntu Linux
 
 .. include:: install_header.inc
 
-Right now, the officially supported distribution are:
+Docker is supported on the following versions of Ubuntu:
 
 - :ref:`ubuntu_precise`
 - :ref:`ubuntu_raring`
 
-Docker has the following dependencies
-
-* Linux kernel 3.8 (read more about :ref:`kernel`)
-* AUFS file system support (we are working on BTRFS support as an alternative)
-
 Please read :ref:`ufw`, if you plan to use `UFW (Uncomplicated
 Firewall) <https://help.ubuntu.com/community/UFW>`_
 
@@ -70,32 +65,35 @@ Installation
 
 Docker is available as a Debian package, which makes installation easy.
 
+First add the Docker repository key to your local keychain. You can use the
+``apt-key`` command to check the fingerprint matches: ``36A1 D786 9245 C895 0F96
+6E92 D857 6A8B A88D 21E9``
 
 .. code-block:: bash
 
-   # Add the Docker repository key to your local keychain
-   # using apt-key finger you can check the fingerprint matches 36A1 D786 9245 C895 0F96 6E92 D857 6A8B A88D 21E9
    sudo sh -c "wget -qO- https://get.docker.io/gpg | apt-key add -"
 
-   # Add the Docker repository to your apt sources list.
+Add the Docker repository to your apt sources list, update and install the
+``lxc-docker`` package. 
+
+*You may receive a warning that the package isn't trusted. Answer yes to
+continue installation.*
+
+.. code-block:: bash
+
    sudo sh -c "echo deb http://get.docker.io/ubuntu docker main\
    > /etc/apt/sources.list.d/docker.list"
-
-   # Update your sources
    sudo apt-get update
-
-   # Install, you will see another warning that the package cannot be authenticated. Confirm install.
    sudo apt-get install lxc-docker
 
-Verify it worked
+Now verify that the installation has worked by downloading the ``ubuntu`` image
+and launching a container.
 
 .. code-block:: bash
 
-   # download the base 'ubuntu' container and run bash inside it while setting up an interactive shell
    sudo docker run -i -t ubuntu /bin/bash
 
-   # type 'exit' to exit
-
+Type ``exit`` to exit
 
 **Done!**, now continue with the :ref:`hello_world` example.
 
@@ -107,10 +105,13 @@ Ubuntu Raring 13.04 (64 bit)
 Dependencies
 ------------
 
-**AUFS filesystem support**
+**Optional AUFS filesystem support**
 
 Ubuntu Raring already comes with the 3.8 kernel, so we don't need to install it. However, not all systems
-have AUFS filesystem support enabled, so we need to install it.
+have AUFS filesystem support enabled. AUFS support is optional as of version 0.7, but it's still available as
+a driver and we recommend using it if you can.
+
+To make sure AUFS is installed, run the following commands:
 
 .. code-block:: bash
 
@@ -123,36 +124,37 @@ Installation
 
 Docker is available as a Debian package, which makes installation easy.
 
-*Please note that these instructions have changed for 0.6. If you are upgrading from an earlier version, you will need
-to follow them again.*
+.. warning::
+
+    Please note that these instructions have changed for 0.6. If you are upgrading from an earlier version, you will need
+    to follow them again.
+
+First add the Docker repository key to your local keychain. You can use the
+``apt-key`` command to check the fingerprint matches: ``36A1 D786 9245 C895 0F96
+6E92 D857 6A8B A88D 21E9``
 
 .. code-block:: bash
 
-   # Add the Docker repository key to your local keychain
-   # using apt-key finger you can check the fingerprint matches 36A1 D786 9245 C895 0F96 6E92 D857 6A8B A88D 21E9
    sudo sh -c "wget -qO- https://get.docker.io/gpg | apt-key add -"
 
-   # Add the Docker repository to your apt sources list.
+Add the Docker repository to your apt sources list, update and install the
+``lxc-docker`` package.
+
+.. code-block:: bash
+
    sudo sh -c "echo deb http://get.docker.io/ubuntu docker main\
    > /etc/apt/sources.list.d/docker.list"
-
-   # update
    sudo apt-get update
-
-   # install
    sudo apt-get install lxc-docker
 
-
-Verify it worked
+Now verify that the installation has worked by downloading the ``ubuntu`` image
+and launching a container.
 
 .. code-block:: bash
 
-   # download the base 'ubuntu' container
-   # and run bash inside it while setting up an interactive shell
    sudo docker run -i -t ubuntu /bin/bash
 
-   # type exit to exit
-
+Type ``exit`` to exit
 
 **Done!**, now continue with the :ref:`hello_world` example.
 
@@ -162,8 +164,8 @@ Verify it worked
 Docker and UFW
 ^^^^^^^^^^^^^^
 
-Docker uses a bridge to manage container networking. By default, UFW
-drops all `forwarding`, thus a first step is to enable UFW forwarding:
+Docker uses a bridge to manage container networking. By default, UFW drops all
+`forwarding` traffic. As a result will you need to enable UFW forwarding:
 
 .. code-block:: bash
 
@@ -181,9 +183,9 @@ Then reload UFW:
    sudo ufw reload
 
 
-UFW's default set of rules denied all `incoming`, so if you want to be
-able to reach your containers from another host, you should allow
-incoming connections on the docker port (default 4243):
+UFW's default set of rules denies all `incoming` traffic. If you want to be
+able to reach your containers from another host then you should allow
+incoming connections on the Docker port (default 4243):
 
 .. code-block:: bash
 

+ 2 - 2
docs/sources/use/basics.rst

@@ -76,11 +76,11 @@ client commands.
   # Add the docker group if it doesn't already exist.
   sudo groupadd docker
 
-  # Add the user "ubuntu" to the docker group.
+  # Add the connected user "${USERNAME}" to the docker group.
   # Change the user name to match your preferred user.
   # You may have to logout and log back in again for
   # this to take effect.
-  sudo gpasswd -a ubuntu docker
+  sudo gpasswd -a ${USERNAME} docker
 
   # Restart the docker daemon.
   sudo service docker restart

+ 1 - 1
docs/theme/docker/layout.html

@@ -35,7 +35,7 @@
     %}
 
     {#
-        This part is hopefully complex because things like |cut '/index/' are not available in spinx jinja
+        This part is hopefully complex because things like |cut '/index/' are not available in Sphinx jinja
         and will make it crash. (and we need index/ out.
     #}
     <link rel="canonical" href="http://docs.docker.io/en/latest/

+ 82 - 45
graph.go

@@ -3,6 +3,7 @@ package docker
 import (
 	"fmt"
 	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io/ioutil"
@@ -10,6 +11,7 @@ import (
 	"path"
 	"path/filepath"
 	"strings"
+	"syscall"
 	"time"
 )
 
@@ -17,11 +19,12 @@ import (
 type Graph struct {
 	Root    string
 	idIndex *utils.TruncIndex
+	driver  graphdriver.Driver
 }
 
 // NewGraph instantiates a new graph at the given root path in the filesystem.
 // `root` will be created if it doesn't exist.
-func NewGraph(root string) (*Graph, error) {
+func NewGraph(root string, driver graphdriver.Driver) (*Graph, error) {
 	abspath, err := filepath.Abs(root)
 	if err != nil {
 		return nil, err
@@ -30,9 +33,11 @@ func NewGraph(root string) (*Graph, error) {
 	if err := os.MkdirAll(root, 0700); err != nil && !os.IsExist(err) {
 		return nil, err
 	}
+
 	graph := &Graph{
 		Root:    abspath,
 		idIndex: utils.NewTruncIndex(),
+		driver:  driver,
 	}
 	if err := graph.restore(); err != nil {
 		return nil, err
@@ -47,7 +52,9 @@ func (graph *Graph) restore() error {
 	}
 	for _, v := range dir {
 		id := v.Name()
-		graph.idIndex.Add(id)
+		if graph.driver.Exists(id) {
+			graph.idIndex.Add(id)
+		}
 	}
 	return nil
 }
@@ -78,16 +85,22 @@ func (graph *Graph) Get(name string) (*Image, error) {
 	if err != nil {
 		return nil, err
 	}
+	// Check that the filesystem layer exists
+	rootfs, err := graph.driver.Get(img.ID)
+	if err != nil {
+		return nil, fmt.Errorf("Driver %s failed to get image rootfs %s: %s", graph.driver, img.ID, err)
+	}
 	if img.ID != id {
 		return nil, fmt.Errorf("Image stored at '%s' has wrong id '%s'", id, img.ID)
 	}
 	img.graph = graph
 	if img.Size == 0 {
-		root, err := img.root()
+		size, err := utils.TreeSize(rootfs)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("Error computing size of rootfs %s: %s", img.ID, err)
 		}
-		if err := StoreSize(img, root); err != nil {
+		img.Size = size
+		if err := img.SaveSize(graph.imageRoot(id)); err != nil {
 			return nil, err
 		}
 	}
@@ -99,7 +112,7 @@ func (graph *Graph) Create(layerData archive.Archive, container *Container, comm
 	img := &Image{
 		ID:            GenerateID(),
 		Comment:       comment,
-		Created:       time.Now(),
+		Created:       time.Now().UTC(),
 		DockerVersion: VERSION,
 		Author:        author,
 		Config:        config,
@@ -118,7 +131,15 @@ func (graph *Graph) Create(layerData archive.Archive, container *Container, comm
 
 // Register imports a pre-existing image into the graph.
 // FIXME: pass img as first argument
-func (graph *Graph) Register(jsonData []byte, layerData archive.Archive, img *Image) error {
+func (graph *Graph) Register(jsonData []byte, layerData archive.Archive, img *Image) (err error) {
+	defer func() {
+		// If any error occurs, remove the new dir from the driver.
+		// Don't check for errors since the dir might not have been created.
+		// FIXME: this leaves a possible race condition.
+		if err != nil {
+			graph.driver.Remove(img.ID)
+		}
+	}()
 	if err := ValidateID(img.ID); err != nil {
 		return err
 	}
@@ -126,19 +147,43 @@ func (graph *Graph) Register(jsonData []byte, layerData archive.Archive, img *Im
 	if graph.Exists(img.ID) {
 		return fmt.Errorf("Image %s already exists", img.ID)
 	}
+
+	// Ensure that the image root does not exist on the filesystem
+	// when it is not registered in the graph.
+	// This is common when you switch from one graph driver to another
+	if err := os.RemoveAll(graph.imageRoot(img.ID)); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+
+	// If the driver has this ID but the graph doesn't, remove it from the driver to start fresh.
+	// (the graph is the source of truth).
+	// Ignore errors, since we don't know if the driver correctly returns ErrNotExist.
+	// (FIXME: make that mandatory for drivers).
+	graph.driver.Remove(img.ID)
+
 	tmp, err := graph.Mktemp("")
 	defer os.RemoveAll(tmp)
 	if err != nil {
 		return fmt.Errorf("Mktemp failed: %s", err)
 	}
-	if err := StoreImage(img, jsonData, layerData, tmp); err != nil {
+
+	// Create root filesystem in the driver
+	if err := graph.driver.Create(img.ID, img.Parent); err != nil {
+		return fmt.Errorf("Driver %s failed to create image rootfs %s: %s", graph.driver, img.ID, err)
+	}
+	// Mount the root filesystem so we can apply the diff/layer
+	rootfs, err := graph.driver.Get(img.ID)
+	if err != nil {
+		return fmt.Errorf("Driver %s failed to get image rootfs %s: %s", graph.driver, img.ID, err)
+	}
+	img.graph = graph
+	if err := StoreImage(img, jsonData, layerData, tmp, rootfs); err != nil {
 		return err
 	}
 	// Commit
 	if err := os.Rename(tmp, graph.imageRoot(img.ID)); err != nil {
 		return err
 	}
-	img.graph = graph
 	graph.idIndex.Add(img.ID)
 	return nil
 }
@@ -152,50 +197,33 @@ func (graph *Graph) TempLayerArchive(id string, compression archive.Compression,
 	if err != nil {
 		return nil, err
 	}
-	tmp, err := graph.tmp()
+	tmp, err := graph.Mktemp("")
 	if err != nil {
 		return nil, err
 	}
-	a, err := image.TarLayer(compression)
+	a, err := image.TarLayer()
 	if err != nil {
 		return nil, err
 	}
-	return archive.NewTempArchive(utils.ProgressReader(ioutil.NopCloser(a), 0, output, sf.FormatProgress("", "Buffering to disk", "%v/%v (%v)"), sf, true), tmp.Root)
+	return archive.NewTempArchive(utils.ProgressReader(ioutil.NopCloser(a), 0, output, sf.FormatProgress("", "Buffering to disk", "%v/%v (%v)"), sf, true), tmp)
 }
 
 // Mktemp creates a temporary sub-directory inside the graph's filesystem.
 func (graph *Graph) Mktemp(id string) (string, error) {
-	if id == "" {
-		id = GenerateID()
-	}
-	tmp, err := graph.tmp()
-	if err != nil {
-		return "", fmt.Errorf("Couldn't create temp: %s", err)
-	}
-	if tmp.Exists(id) {
-		return "", fmt.Errorf("Image %s already exists", id)
+	dir := path.Join(graph.Root, "_tmp", GenerateID())
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return "", err
 	}
-	return tmp.imageRoot(id), nil
+	return dir, nil
 }
 
-// getDockerInitLayer returns the path of a layer containing a mountpoint suitable
+// setupInitLayer populates a directory with mountpoints suitable
 // for bind-mounting dockerinit into the container. The mountpoint is simply an
 // empty file at /.dockerinit
 //
 // This extra layer is used by all containers as the top-most ro layer. It protects
 // the container from unwanted side-effects on the rw layer.
-func (graph *Graph) getDockerInitLayer() (string, error) {
-	tmp, err := graph.tmp()
-	if err != nil {
-		return "", err
-	}
-	initLayer := tmp.imageRoot("_dockerinit")
-	if err := os.Mkdir(initLayer, 0755); err != nil && !os.IsExist(err) {
-		// If directory already existed, keep going.
-		// For all other errors, abort.
-		return "", err
-	}
-
+func setupInitLayer(initLayer string) error {
 	for pth, typ := range map[string]string{
 		"/dev/pts":         "dir",
 		"/dev/shm":         "dir",
@@ -209,36 +237,38 @@ func (graph *Graph) getDockerInitLayer() (string, error) {
 		// "var/run": "dir",
 		// "var/lock": "dir",
 	} {
+		parts := strings.Split(pth, "/")
+		prev := "/"
+		for _, p := range parts[1:] {
+			prev = path.Join(prev, p)
+			syscall.Unlink(path.Join(initLayer, prev))
+		}
+
 		if _, err := os.Stat(path.Join(initLayer, pth)); err != nil {
 			if os.IsNotExist(err) {
 				switch typ {
 				case "dir":
 					if err := os.MkdirAll(path.Join(initLayer, pth), 0755); err != nil {
-						return "", err
+						return err
 					}
 				case "file":
 					if err := os.MkdirAll(path.Join(initLayer, path.Dir(pth)), 0755); err != nil {
-						return "", err
+						return err
 					}
 					f, err := os.OpenFile(path.Join(initLayer, pth), os.O_CREATE, 0755)
 					if err != nil {
-						return "", err
+						return err
 					}
 					f.Close()
 				}
 			} else {
-				return "", err
+				return err
 			}
 		}
 	}
 
 	// Layer is ready to use, if it wasn't before.
-	return initLayer, nil
-}
-
-func (graph *Graph) tmp() (*Graph, error) {
-	// Changed to _tmp from :tmp:, because it messed with ":" separators in aufs branch syntax...
-	return NewGraph(path.Join(graph.Root, "_tmp"))
+	return nil
 }
 
 // Check if given error is "not empty".
@@ -270,6 +300,9 @@ func (graph *Graph) Delete(name string) error {
 	if err != nil {
 		return err
 	}
+	// Remove rootfs data from the driver
+	graph.driver.Remove(id)
+	// Remove the trashed image directory
 	return os.RemoveAll(tmp)
 }
 
@@ -344,3 +377,7 @@ func (graph *Graph) Heads() (map[string]*Image, error) {
 func (graph *Graph) imageRoot(id string) string {
 	return path.Join(graph.Root, id)
 }
+
+func (graph *Graph) Driver() graphdriver.Driver {
+	return graph.driver
+}

+ 0 - 292
graph_test.go

@@ -1,292 +0,0 @@
-package docker
-
-import (
-	"archive/tar"
-	"bytes"
-	"errors"
-	"github.com/dotcloud/docker/archive"
-	"github.com/dotcloud/docker/utils"
-	"io"
-	"io/ioutil"
-	"os"
-	"testing"
-	"time"
-)
-
-func TestInit(t *testing.T) {
-	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
-	// Root should exist
-	if _, err := os.Stat(graph.Root); err != nil {
-		t.Fatal(err)
-	}
-	// Map() should be empty
-	if l, err := graph.Map(); err != nil {
-		t.Fatal(err)
-	} else if len(l) != 0 {
-		t.Fatalf("len(Map()) should return %d, not %d", 0, len(l))
-	}
-}
-
-// Test that Register can be interrupted cleanly without side effects
-func TestInterruptedRegister(t *testing.T) {
-	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
-	badArchive, w := io.Pipe() // Use a pipe reader as a fake archive which never yields data
-	image := &Image{
-		ID:      GenerateID(),
-		Comment: "testing",
-		Created: time.Now(),
-	}
-	go graph.Register(nil, badArchive, image)
-	time.Sleep(200 * time.Millisecond)
-	w.CloseWithError(errors.New("But I'm not a tarball!")) // (Nobody's perfect, darling)
-	if _, err := graph.Get(image.ID); err == nil {
-		t.Fatal("Image should not exist after Register is interrupted")
-	}
-	// Registering the same image again should succeed if the first register was interrupted
-	goodArchive, err := fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	if err := graph.Register(nil, goodArchive, image); err != nil {
-		t.Fatal(err)
-	}
-}
-
-// FIXME: Do more extensive tests (ex: create multiple, delete, recreate;
-//       create multiple, check the amount of images and paths, etc..)
-func TestGraphCreate(t *testing.T) {
-	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
-	archive, err := fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	image, err := graph.Create(archive, nil, "Testing", "", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if err := ValidateID(image.ID); err != nil {
-		t.Fatal(err)
-	}
-	if image.Comment != "Testing" {
-		t.Fatalf("Wrong comment: should be '%s', not '%s'", "Testing", image.Comment)
-	}
-	if image.DockerVersion != VERSION {
-		t.Fatalf("Wrong docker_version: should be '%s', not '%s'", VERSION, image.DockerVersion)
-	}
-	images, err := graph.Map()
-	if err != nil {
-		t.Fatal(err)
-	} else if l := len(images); l != 1 {
-		t.Fatalf("Wrong number of images. Should be %d, not %d", 1, l)
-	}
-	if images[image.ID] == nil {
-		t.Fatalf("Could not find image with id %s", image.ID)
-	}
-}
-
-func TestRegister(t *testing.T) {
-	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
-	archive, err := fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	image := &Image{
-		ID:      GenerateID(),
-		Comment: "testing",
-		Created: time.Now(),
-	}
-	err = graph.Register(nil, archive, image)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if images, err := graph.Map(); err != nil {
-		t.Fatal(err)
-	} else if l := len(images); l != 1 {
-		t.Fatalf("Wrong number of images. Should be %d, not %d", 1, l)
-	}
-	if resultImg, err := graph.Get(image.ID); err != nil {
-		t.Fatal(err)
-	} else {
-		if resultImg.ID != image.ID {
-			t.Fatalf("Wrong image ID. Should be '%s', not '%s'", image.ID, resultImg.ID)
-		}
-		if resultImg.Comment != image.Comment {
-			t.Fatalf("Wrong image comment. Should be '%s', not '%s'", image.Comment, resultImg.Comment)
-		}
-	}
-}
-
-// Test that an image can be deleted by its shorthand prefix
-func TestDeletePrefix(t *testing.T) {
-	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
-	img := createTestImage(graph, t)
-	if err := graph.Delete(utils.TruncateID(img.ID)); err != nil {
-		t.Fatal(err)
-	}
-	assertNImages(graph, t, 0)
-}
-
-func createTestImage(graph *Graph, t *testing.T) *Image {
-	archive, err := fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	img, err := graph.Create(archive, nil, "Test image", "", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	return img
-}
-
-func TestDelete(t *testing.T) {
-	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
-	archive, err := fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	assertNImages(graph, t, 0)
-	img, err := graph.Create(archive, nil, "Bla bla", "", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	assertNImages(graph, t, 1)
-	if err := graph.Delete(img.ID); err != nil {
-		t.Fatal(err)
-	}
-	assertNImages(graph, t, 0)
-
-	archive, err = fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	// Test 2 create (same name) / 1 delete
-	img1, err := graph.Create(archive, nil, "Testing", "", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	archive, err = fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	if _, err = graph.Create(archive, nil, "Testing", "", nil); err != nil {
-		t.Fatal(err)
-	}
-	assertNImages(graph, t, 2)
-	if err := graph.Delete(img1.ID); err != nil {
-		t.Fatal(err)
-	}
-	assertNImages(graph, t, 1)
-
-	// Test delete wrong name
-	if err := graph.Delete("Not_foo"); err == nil {
-		t.Fatalf("Deleting wrong ID should return an error")
-	}
-	assertNImages(graph, t, 1)
-
-	archive, err = fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	// Test delete twice (pull -> rm -> pull -> rm)
-	if err := graph.Register(nil, archive, img1); err != nil {
-		t.Fatal(err)
-	}
-	if err := graph.Delete(img1.ID); err != nil {
-		t.Fatal(err)
-	}
-	assertNImages(graph, t, 1)
-}
-
-func TestByParent(t *testing.T) {
-	archive1, _ := fakeTar()
-	archive2, _ := fakeTar()
-	archive3, _ := fakeTar()
-
-	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
-	parentImage := &Image{
-		ID:      GenerateID(),
-		Comment: "parent",
-		Created: time.Now(),
-		Parent:  "",
-	}
-	childImage1 := &Image{
-		ID:      GenerateID(),
-		Comment: "child1",
-		Created: time.Now(),
-		Parent:  parentImage.ID,
-	}
-	childImage2 := &Image{
-		ID:      GenerateID(),
-		Comment: "child2",
-		Created: time.Now(),
-		Parent:  parentImage.ID,
-	}
-	_ = graph.Register(nil, archive1, parentImage)
-	_ = graph.Register(nil, archive2, childImage1)
-	_ = graph.Register(nil, archive3, childImage2)
-
-	byParent, err := graph.ByParent()
-	if err != nil {
-		t.Fatal(err)
-	}
-	numChildren := len(byParent[parentImage.ID])
-	if numChildren != 2 {
-		t.Fatalf("Expected 2 children, found %d", numChildren)
-	}
-}
-
-func assertNImages(graph *Graph, t *testing.T, n int) {
-	if images, err := graph.Map(); err != nil {
-		t.Fatal(err)
-	} else if actualN := len(images); actualN != n {
-		t.Fatalf("Expected %d images, found %d", n, actualN)
-	}
-}
-
-/*
- * HELPER FUNCTIONS
- */
-
-func tempGraph(t *testing.T) *Graph {
-	tmp, err := ioutil.TempDir("", "docker-graph-")
-	if err != nil {
-		t.Fatal(err)
-	}
-	graph, err := NewGraph(tmp)
-	if err != nil {
-		t.Fatal(err)
-	}
-	return graph
-}
-
-func testArchive(t *testing.T) archive.Archive {
-	archive, err := fakeTar()
-	if err != nil {
-		t.Fatal(err)
-	}
-	return archive
-}
-
-func fakeTar() (io.Reader, error) {
-	content := []byte("Hello world!\n")
-	buf := new(bytes.Buffer)
-	tw := tar.NewWriter(buf)
-	for _, name := range []string{"/etc/postgres/postgres.conf", "/etc/passwd", "/var/log/postgres/postgres.conf"} {
-		hdr := new(tar.Header)
-		hdr.Size = int64(len(content))
-		hdr.Name = name
-		if err := tw.WriteHeader(hdr); err != nil {
-			return nil, err
-		}
-		tw.Write([]byte(content))
-	}
-	tw.Close()
-	return buf, nil
-}

+ 0 - 0
gograph/MAINTAINERS → graphdb/MAINTAINERS


+ 1 - 1
gograph/gograph.go → graphdb/graphdb.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 import (
 	"database/sql"

+ 1 - 1
gograph/gograph_test.go → graphdb/graphdb_test.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 import (
 	_ "code.google.com/p/gosqlite/sqlite3"

+ 1 - 1
gograph/sort.go → graphdb/sort.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 import "sort"
 

+ 1 - 1
gograph/sort_test.go → graphdb/sort_test.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 import (
 	"testing"

+ 1 - 1
gograph/utils.go → graphdb/utils.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 import (
 	"path"

+ 336 - 0
graphdriver/aufs/aufs.go

@@ -0,0 +1,336 @@
+/*
+
+aufs driver directory structure
+
+.
+├── layers // Metadata of layers
+│   ├── 1
+│   ├── 2
+│   └── 3
+├── diffs  // Content of the layer
+│   ├── 1  // Contains layers that need to be mounted for the id
+│   ├── 2
+│   └── 3
+└── mnt    // Mount points for the rw layers to be mounted
+    ├── 1
+    ├── 2
+    └── 3
+
+*/
+
+package aufs
+
+import (
+	"bufio"
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
+	"github.com/dotcloud/docker/utils"
+	"log"
+	"os"
+	"os/exec"
+	"path"
+	"strings"
+)
+
+func init() {
+	graphdriver.Register("aufs", Init)
+}
+
+type Driver struct {
+	root string
+}
+
+// New returns a new AUFS driver.
+// An error is returned if AUFS is not supported.
+func Init(root string) (graphdriver.Driver, error) {
+	// Try to load the aufs kernel module
+	if err := supportsAufs(); err != nil {
+		return nil, err
+	}
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	// Create the root aufs driver dir and return
+	// if it already exists
+	// If not populate the dir structure
+	if err := os.MkdirAll(root, 0755); err != nil {
+		if os.IsExist(err) {
+			return &Driver{root}, nil
+		}
+		return nil, err
+	}
+
+	for _, p := range paths {
+		if err := os.MkdirAll(path.Join(root, p), 0755); err != nil {
+			return nil, err
+		}
+	}
+	return &Driver{root}, nil
+}
+
+// Return a nil error if the kernel supports aufs
+// We cannot modprobe because inside dind modprobe fails
+// to run
+func supportsAufs() error {
+	// We can try to modprobe aufs first before looking at
+	// proc/filesystems for when aufs is supported
+	exec.Command("modprobe", "aufs").Run()
+
+	f, err := os.Open("/proc/filesystems")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		if strings.Contains(s.Text(), "aufs") {
+			return nil
+		}
+	}
+	return fmt.Errorf("AUFS was not found in /proc/filesystems")
+}
+
+func (a Driver) rootPath() string {
+	return a.root
+}
+
+func (Driver) String() string {
+	return "aufs"
+}
+
+func (a Driver) Status() [][2]string {
+	ids, _ := loadIds(path.Join(a.rootPath(), "layers"))
+	return [][2]string{
+		{"Root Dir", a.rootPath()},
+		{"Dirs", fmt.Sprintf("%d", len(ids))},
+	}
+}
+
+// Exists returns true if the given id is registered with
+// this driver
+func (a Driver) Exists(id string) bool {
+	if _, err := os.Lstat(path.Join(a.rootPath(), "layers", id)); err != nil {
+		return false
+	}
+	return true
+}
+
+// Three folders are created for each id
+// mnt, layers, and diff
+func (a *Driver) Create(id, parent string) error {
+	if err := a.createDirsFor(id); err != nil {
+		return err
+	}
+	// Write the layers metadata
+	f, err := os.Create(path.Join(a.rootPath(), "layers", id))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	if parent != "" {
+		ids, err := getParentIds(a.rootPath(), parent)
+		if err != nil {
+			return err
+		}
+
+		if _, err := fmt.Fprintln(f, parent); err != nil {
+			return err
+		}
+		for _, i := range ids {
+			if _, err := fmt.Fprintln(f, i); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (a *Driver) createDirsFor(id string) error {
+	paths := []string{
+		"mnt",
+		"diff",
+	}
+
+	for _, p := range paths {
+		if err := os.MkdirAll(path.Join(a.rootPath(), p, id), 0755); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Unmount and remove the dir information
+func (a *Driver) Remove(id string) error {
+	// Make sure the dir is umounted first
+	if err := a.unmount(id); err != nil {
+		return err
+	}
+	tmpDirs := []string{
+		"mnt",
+		"diff",
+	}
+
+	// Remove the dirs atomically
+	for _, p := range tmpDirs {
+		// We need to use a temp dir in the same dir as the driver so Rename
+		// does not fall back to the slow copy if /tmp and the driver dir
+		// are on different devices
+		tmp := path.Join(a.rootPath(), "tmp", p, id)
+		if err := os.MkdirAll(tmp, 0755); err != nil {
+			return err
+		}
+		realPath := path.Join(a.rootPath(), p, id)
+		if err := os.Rename(realPath, tmp); err != nil && !os.IsNotExist(err) {
+			return err
+		}
+		defer os.RemoveAll(tmp)
+	}
+
+	// Remove the layers file for the id
+	if err := os.Remove(path.Join(a.rootPath(), "layers", id)); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return nil
+}
+
+// Return the rootfs path for the id
+// This will mount the dir at it's given path
+func (a *Driver) Get(id string) (string, error) {
+	ids, err := getParentIds(a.rootPath(), id)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			return "", err
+		}
+		ids = []string{}
+	}
+
+	// If a dir does not have a parent ( no layers )do not try to mount
+	// just return the diff path to the data
+	out := path.Join(a.rootPath(), "diff", id)
+	if len(ids) > 0 {
+		out = path.Join(a.rootPath(), "mnt", id)
+		if err := a.mount(id); err != nil {
+			return "", err
+		}
+	}
+	return out, nil
+}
+
+// Returns an archive of the contents for the id
+func (a *Driver) Diff(id string) (archive.Archive, error) {
+	return archive.TarFilter(path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
+		Recursive:   true,
+		Compression: archive.Uncompressed,
+	})
+}
+
+func (a *Driver) ApplyDiff(id string, diff archive.Archive) error {
+	return archive.Untar(diff, path.Join(a.rootPath(), "diff", id), nil)
+}
+
+// Returns the size of the contents for the id
+func (a *Driver) DiffSize(id string) (int64, error) {
+	return utils.TreeSize(path.Join(a.rootPath(), "diff", id))
+}
+
+func (a *Driver) Changes(id string) ([]archive.Change, error) {
+	layers, err := a.getParentLayerPaths(id)
+	if err != nil {
+		return nil, err
+	}
+	return archive.Changes(layers, path.Join(a.rootPath(), "diff", id))
+}
+
+func (a *Driver) getParentLayerPaths(id string) ([]string, error) {
+	parentIds, err := getParentIds(a.rootPath(), id)
+	if err != nil {
+		return nil, err
+	}
+	if len(parentIds) == 0 {
+		return nil, fmt.Errorf("Dir %s does not have any parent layers", id)
+	}
+	layers := make([]string, len(parentIds))
+
+	// Get the diff paths for all the parent ids
+	for i, p := range parentIds {
+		layers[i] = path.Join(a.rootPath(), "diff", p)
+	}
+	return layers, nil
+}
+
+func (a *Driver) mount(id string) error {
+	// If the id is mounted or we get an error return
+	if mounted, err := a.mounted(id); err != nil || mounted {
+		return err
+	}
+
+	var (
+		target = path.Join(a.rootPath(), "mnt", id)
+		rw     = path.Join(a.rootPath(), "diff", id)
+	)
+
+	layers, err := a.getParentLayerPaths(id)
+	if err != nil {
+		return err
+	}
+
+	if err := a.aufsMount(layers, rw, target); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (a *Driver) unmount(id string) error {
+	if mounted, err := a.mounted(id); err != nil || !mounted {
+		return err
+	}
+	target := path.Join(a.rootPath(), "mnt", id)
+	return Unmount(target)
+}
+
+func (a *Driver) mounted(id string) (bool, error) {
+	target := path.Join(a.rootPath(), "mnt", id)
+	return Mounted(target)
+}
+
+// During cleanup aufs needs to unmount all mountpoints
+func (a *Driver) Cleanup() error {
+	ids, err := loadIds(path.Join(a.rootPath(), "layers"))
+	if err != nil {
+		return err
+	}
+	for _, id := range ids {
+		if err := a.unmount(id); err != nil {
+			utils.Errorf("Unmounting %s: %s", utils.TruncateID(id), err)
+		}
+	}
+	return nil
+}
+
+func (a *Driver) aufsMount(ro []string, rw, target string) error {
+	rwBranch := fmt.Sprintf("%v=rw", rw)
+	roBranches := ""
+	for _, layer := range ro {
+		roBranches += fmt.Sprintf("%v=ro+wh:", layer)
+	}
+	branches := fmt.Sprintf("br:%v:%v,xino=/dev/shm/aufs.xino", rwBranch, roBranches)
+
+	//if error, try to load aufs kernel module
+	if err := mount("none", target, "aufs", 0, branches); err != nil {
+		log.Printf("Kernel does not support AUFS, trying to load the AUFS module with modprobe...")
+		if err := exec.Command("modprobe", "aufs").Run(); err != nil {
+			return fmt.Errorf("Unable to load the AUFS module")
+		}
+		log.Printf("...module loaded.")
+		if err := mount("none", target, "aufs", 0, branches); err != nil {
+			return fmt.Errorf("Unable to mount using aufs %s", err)
+		}
+	}
+	return nil
+}

+ 623 - 0
graphdriver/aufs/aufs_test.go

@@ -0,0 +1,623 @@
+package aufs
+
+import (
+	"github.com/dotcloud/docker/archive"
+	"os"
+	"path"
+	"testing"
+)
+
+var (
+	tmp = path.Join(os.TempDir(), "aufs-tests", "aufs")
+)
+
+func newDriver(t *testing.T) *Driver {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	d, err := Init(tmp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return d.(*Driver)
+}
+
+func TestNewDriver(t *testing.T) {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	d, err := Init(tmp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(tmp)
+	if d == nil {
+		t.Fatalf("Driver should not be nil")
+	}
+}
+
+func TestAufsString(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if d.String() != "aufs" {
+		t.Fatalf("Expected aufs got %s", d.String())
+	}
+}
+
+func TestCreateDirStructure(t *testing.T) {
+	newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	paths := []string{
+		"mnt",
+		"layers",
+		"diff",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p)); err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+// We should be able to create two drivers with the same dir structure
+func TestNewDriverFromExistingDir(t *testing.T) {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := Init(tmp); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := Init(tmp); err != nil {
+		t.Fatal(err)
+	}
+	os.RemoveAll(tmp)
+}
+
+func TestCreateNewDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCreateNewDirStructure(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p, "1")); err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+func TestRemoveImage(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Remove("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p, "1")); err == nil {
+			t.Fatalf("Error should not be nil because dirs with id 1 should be delted: %s", p)
+		}
+	}
+}
+
+func TestGetWithoutParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	expected := path.Join(tmp, "diff", "1")
+	if diffPath != expected {
+		t.Fatalf("Expected path %s got %s", expected, diffPath)
+	}
+}
+
+func TestCleanupWithNoDirs(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCleanupWithDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMountedFalseResponse(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	response, err := d.mounted("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if response != false {
+		t.Fatalf("Response if dir id 1 is mounted should be false")
+	}
+}
+
+func TestMountedTrueReponse(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	_, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	response, err := d.mounted("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if response != true {
+		t.Fatalf("Response if dir id 2 is mounted should be true")
+	}
+}
+
+func TestMountWithParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPath, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mntPath == "" {
+		t.Fatal("mntPath should not be empty string")
+	}
+
+	expected := path.Join(tmp, "mnt", "2")
+	if mntPath != expected {
+		t.Fatalf("Expected %s got %s", expected, mntPath)
+	}
+}
+
+func TestRemoveMountedDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPath, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mntPath == "" {
+		t.Fatal("mntPath should not be empty string")
+	}
+
+	mounted, err := d.mounted("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !mounted {
+		t.Fatalf("Dir id 2 should be mounted")
+	}
+
+	if err := d.Remove("2"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCreateWithInvalidParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "docker"); err == nil {
+		t.Fatalf("Error should not be nil with parent does not exist")
+	}
+}
+
+func TestGetDiff(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	a, err := d.Diff("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if a == nil {
+		t.Fatalf("Archive should not be nil")
+	}
+}
+
+func TestChanges(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPoint, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a file to save in the mountpoint
+	f, err := os.Create(path.Join(mntPoint, "test.txt"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := f.WriteString("testline"); err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	changes, err := d.Changes("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(changes) != 1 {
+		t.Fatalf("Dir 2 should have one change from parent got %d", len(changes))
+	}
+	change := changes[0]
+
+	expectedPath := "/test.txt"
+	if change.Path != expectedPath {
+		t.Fatalf("Expected path %s got %s", expectedPath, change.Path)
+	}
+
+	if change.Kind != archive.ChangeAdd {
+		t.Fatalf("Change kind should be ChangeAdd got %s", change.Kind)
+	}
+
+	if err := d.Create("3", "2"); err != nil {
+		t.Fatal(err)
+	}
+	mntPoint, err = d.Get("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a file to save in the mountpoint
+	f, err = os.Create(path.Join(mntPoint, "test2.txt"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := f.WriteString("testline"); err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	changes, err = d.Changes("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(changes) != 1 {
+		t.Fatalf("Dir 2 should have one change from parent got %d", len(changes))
+	}
+	change = changes[0]
+
+	expectedPath = "/test2.txt"
+	if change.Path != expectedPath {
+		t.Fatalf("Expected path %s got %s", expectedPath, change.Path)
+	}
+
+	if change.Kind != archive.ChangeAdd {
+		t.Fatalf("Change kind should be ChangeAdd got %s", change.Kind)
+	}
+}
+
+func TestDiffSize(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	s, err := f.Stat()
+	if err != nil {
+		t.Fatal(err)
+	}
+	size = s.Size()
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err := d.DiffSize("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if diffSize != size {
+		t.Fatalf("Expected size to be %d got %d", size, diffSize)
+	}
+}
+
+func TestChildDiffSize(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	s, err := f.Stat()
+	if err != nil {
+		t.Fatal(err)
+	}
+	size = s.Size()
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err := d.DiffSize("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if diffSize != size {
+		t.Fatalf("Expected size to be %d got %d", size, diffSize)
+	}
+
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err = d.DiffSize("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	// The diff size for the child should be zero
+	if diffSize != 0 {
+		t.Fatalf("Expected size to be %d got %d", 0, diffSize)
+	}
+}
+
+func TestExists(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if d.Exists("none") {
+		t.Fatal("id name should not exist in the driver")
+	}
+
+	if !d.Exists("1") {
+		t.Fatal("id 1 should exist in the driver")
+	}
+}
+
+func TestStatus(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	status := d.Status()
+	if status == nil || len(status) == 0 {
+		t.Fatal("Status should not be nil or empty")
+	}
+	rootDir := status[0]
+	dirs := status[1]
+	if rootDir[0] != "Root Dir" {
+		t.Fatalf("Expected Root Dir got %s", rootDir[0])
+	}
+	if rootDir[1] != d.rootPath() {
+		t.Fatalf("Expected %s got %s", d.rootPath(), rootDir[1])
+	}
+	if dirs[0] != "Dirs" {
+		t.Fatalf("Expected Dirs got %s", dirs[0])
+	}
+	if dirs[1] != "1" {
+		t.Fatalf("Expected 1 got %s", dirs[1])
+	}
+}
+
+func TestApplyDiff(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	diff, err := d.Diff("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Create("2", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("3", "2"); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.ApplyDiff("3", diff); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure that the file is in the mount point for id 3
+
+	mountPoint, err := d.Get("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := os.Stat(path.Join(mountPoint, "test_file")); err != nil {
+		t.Fatal(err)
+	}
+}

+ 46 - 0
graphdriver/aufs/dirs.go

@@ -0,0 +1,46 @@
+package aufs
+
+import (
+	"bufio"
+	"io/ioutil"
+	"os"
+	"path"
+)
+
+// Return all the directories
+func loadIds(root string) ([]string, error) {
+	dirs, err := ioutil.ReadDir(root)
+	if err != nil {
+		return nil, err
+	}
+	out := []string{}
+	for _, d := range dirs {
+		if !d.IsDir() {
+			out = append(out, d.Name())
+		}
+	}
+	return out, nil
+}
+
+// Read the layers file for the current id and return all the
+// layers represented by new lines in the file
+//
+// If there are no lines in the file then the id has no parent
+// and an empty slice is returned.
+func getParentIds(root, id string) ([]string, error) {
+	f, err := os.Open(path.Join(root, "layers", id))
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	out := []string{}
+	s := bufio.NewScanner(f)
+
+	for s.Scan() {
+		if t := s.Text(); t != "" {
+			out = append(out, s.Text())
+		}
+	}
+	return out, s.Err()
+}

+ 194 - 0
graphdriver/aufs/migrate.go

@@ -0,0 +1,194 @@
+package aufs
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path"
+)
+
+type metadata struct {
+	ID       string `json:"id"`
+	ParentID string `json:"parent,omitempty"`
+	Image    string `json:"Image,omitempty"`
+
+	parent *metadata
+}
+
+func pathExists(pth string) bool {
+	if _, err := os.Stat(pth); err != nil {
+		return false
+	}
+	return true
+}
+
+// Migrate existing images and containers from docker < 0.7.x
+//
+// The format pre 0.7 is for docker to store the metadata and filesystem
+// content in the same directory.  For the migration to work we need to move Image layer
+// data from /var/lib/docker/graph/<id>/layers to the diff of the registered id.
+//
+// Next we need to migrate the container's rw layer to diff of the driver.  After the
+// contents are migrated we need to register the image and container ids with the
+// driver.
+//
+// For the migration we try to move the folder containing the layer files, if that
+// fails because the data is currently mounted we will fallback to creating a
+// symlink.
+func (a *Driver) Migrate(pth string, setupInit func(p string) error) error {
+	if pathExists(path.Join(pth, "graph")) {
+		if err := a.migrateRepositories(pth); err != nil {
+			return err
+		}
+		if err := a.migrateImages(path.Join(pth, "graph")); err != nil {
+			return err
+		}
+		return a.migrateContainers(path.Join(pth, "containers"), setupInit)
+	}
+	return nil
+}
+
+func (a *Driver) migrateRepositories(pth string) error {
+	name := path.Join(pth, "repositories")
+	if err := os.Rename(name, name+"-aufs"); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return nil
+}
+
+func (a *Driver) migrateContainers(pth string, setupInit func(p string) error) error {
+	fis, err := ioutil.ReadDir(pth)
+	if err != nil {
+		return err
+	}
+
+	for _, fi := range fis {
+		if id := fi.Name(); fi.IsDir() && pathExists(path.Join(pth, id, "rw")) {
+			if err := tryRelocate(path.Join(pth, id, "rw"), path.Join(a.rootPath(), "diff", id)); err != nil {
+				return err
+			}
+
+			if !a.Exists(id) {
+
+				metadata, err := loadMetadata(path.Join(pth, id, "config.json"))
+				if err != nil {
+					return err
+				}
+
+				initID := fmt.Sprintf("%s-init", id)
+				if err := a.Create(initID, metadata.Image); err != nil {
+					return err
+				}
+
+				initPath, err := a.Get(initID)
+				if err != nil {
+					return err
+				}
+				// setup init layer
+				if err := setupInit(initPath); err != nil {
+					return err
+				}
+
+				if err := a.Create(id, initID); err != nil {
+					return err
+				}
+			}
+		}
+	}
+	return nil
+}
+
+func (a *Driver) migrateImages(pth string) error {
+	fis, err := ioutil.ReadDir(pth)
+	if err != nil {
+		return err
+	}
+	var (
+		m       = make(map[string]*metadata)
+		current *metadata
+		exists  bool
+	)
+
+	for _, fi := range fis {
+		if id := fi.Name(); fi.IsDir() && pathExists(path.Join(pth, id, "layer")) {
+			if current, exists = m[id]; !exists {
+				current, err = loadMetadata(path.Join(pth, id, "json"))
+				if err != nil {
+					return err
+				}
+				m[id] = current
+			}
+		}
+	}
+
+	for _, v := range m {
+		v.parent = m[v.ParentID]
+	}
+
+	migrated := make(map[string]bool)
+	for _, v := range m {
+		if err := a.migrateImage(v, pth, migrated); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (a *Driver) migrateImage(m *metadata, pth string, migrated map[string]bool) error {
+	if !migrated[m.ID] {
+		if m.parent != nil {
+			a.migrateImage(m.parent, pth, migrated)
+		}
+		if err := tryRelocate(path.Join(pth, m.ID, "layer"), path.Join(a.rootPath(), "diff", m.ID)); err != nil {
+			return err
+		}
+		if !a.Exists(m.ID) {
+			if err := a.Create(m.ID, m.ParentID); err != nil {
+				return err
+			}
+		}
+		migrated[m.ID] = true
+	}
+	return nil
+}
+
+// tryRelocate will try to rename the old path to the new pack and if
+// the operation fails, it will fallback to a symlink
+func tryRelocate(oldPath, newPath string) error {
+	s, err := os.Lstat(newPath)
+	if err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	// If the destination is a symlink then we already tried to relocate once before
+	// and it failed so we delete it and try to remove
+	if s != nil && s.Mode()&os.ModeSymlink == os.ModeSymlink {
+		if err := os.RemoveAll(newPath); err != nil {
+			return err
+		}
+	}
+	if err := os.Rename(oldPath, newPath); err != nil {
+		if sErr := os.Symlink(oldPath, newPath); sErr != nil {
+			return fmt.Errorf("Unable to relocate %s to %s: Rename err %s Symlink err %s", oldPath, newPath, err, sErr)
+		}
+	}
+	return nil
+}
+
+func loadMetadata(pth string) (*metadata, error) {
+	f, err := os.Open(pth)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var (
+		out = &metadata{}
+		dec = json.NewDecoder(f)
+	)
+
+	if err := dec.Decode(out); err != nil {
+		return nil, err
+	}
+	return out, nil
+}

+ 3 - 19
mount.go → graphdriver/aufs/mount.go

@@ -1,13 +1,11 @@
-package docker
+package aufs
 
 import (
-	"fmt"
 	"github.com/dotcloud/docker/utils"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"syscall"
-	"time"
 )
 
 func Unmount(target string) error {
@@ -17,22 +15,7 @@ func Unmount(target string) error {
 	if err := syscall.Unmount(target, 0); err != nil {
 		return err
 	}
-	// Even though we just unmounted the filesystem, AUFS will prevent deleting the mntpoint
-	// for some time. We'll just keep retrying until it succeeds.
-	for retries := 0; retries < 1000; retries++ {
-		err := os.Remove(target)
-		if err == nil {
-			// rm mntpoint succeeded
-			return nil
-		}
-		if os.IsNotExist(err) {
-			// mntpoint doesn't exist anymore. Success.
-			return nil
-		}
-		// fmt.Printf("(%v) Remove %v returned: %v\n", retries, target, err)
-		time.Sleep(10 * time.Millisecond)
-	}
-	return fmt.Errorf("Umount: Failed to umount %v", target)
+	return nil
 }
 
 func Mounted(mountpoint string) (bool, error) {
@@ -49,5 +32,6 @@ func Mounted(mountpoint string) (bool, error) {
 	}
 	mntpointSt := mntpoint.Sys().(*syscall.Stat_t)
 	parentSt := parent.Sys().(*syscall.Stat_t)
+
 	return mntpointSt.Dev != parentSt.Dev, nil
 }

+ 1 - 1
mount_darwin.go → graphdriver/aufs/mount_darwin.go

@@ -1,4 +1,4 @@
-package docker
+package aufs
 
 import "errors"
 

+ 1 - 1
mount_linux.go → graphdriver/aufs/mount_linux.go

@@ -1,4 +1,4 @@
-package docker
+package aufs
 
 import "syscall"
 

+ 956 - 0
graphdriver/devmapper/deviceset.go

@@ -0,0 +1,956 @@
+package devmapper
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/dotcloud/docker/utils"
+	"io"
+	"io/ioutil"
+	"path"
+	"path/filepath"
+	"strconv"
+	"sync"
+	"time"
+)
+
+var (
+	DefaultDataLoopbackSize     int64  = 100 * 1024 * 1024 * 1024
+	DefaultMetaDataLoopbackSize int64  = 2 * 1024 * 1024 * 1024
+	DefaultBaseFsSize           uint64 = 10 * 1024 * 1024 * 1024
+)
+
+type DevInfo struct {
+	Hash          string     `json:"-"`
+	DeviceId      int        `json:"device_id"`
+	Size          uint64     `json:"size"`
+	TransactionId uint64     `json:"transaction_id"`
+	Initialized   bool       `json:"initialized"`
+	devices       *DeviceSet `json:"-"`
+}
+
+type MetaData struct {
+	Devices map[string]*DevInfo `json:devices`
+}
+
+type DeviceSet struct {
+	MetaData
+	sync.Mutex
+	root             string
+	devicePrefix     string
+	TransactionId    uint64
+	NewTransactionId uint64
+	nextFreeDevice   int
+	activeMounts     map[string]int
+}
+
+type DiskUsage struct {
+	Used  uint64
+	Total uint64
+}
+
+type Status struct {
+	PoolName         string
+	DataLoopback     string
+	MetadataLoopback string
+	Data             DiskUsage
+	Metadata         DiskUsage
+	SectorSize       uint64
+}
+
+type DevStatus struct {
+	DeviceId            int
+	Size                uint64
+	TransactionId       uint64
+	SizeInSectors       uint64
+	MappedSectors       uint64
+	HighestMappedSector uint64
+}
+
+func getDevName(name string) string {
+	return "/dev/mapper/" + name
+}
+
+func (info *DevInfo) Name() string {
+	hash := info.Hash
+	if hash == "" {
+		hash = "base"
+	}
+	return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash)
+}
+
+func (info *DevInfo) DevName() string {
+	return getDevName(info.Name())
+}
+
+func (devices *DeviceSet) loopbackDir() string {
+	return path.Join(devices.root, "devicemapper")
+}
+
+func (devices *DeviceSet) jsonFile() string {
+	return path.Join(devices.loopbackDir(), "json")
+}
+
+func (devices *DeviceSet) getPoolName() string {
+	return devices.devicePrefix + "-pool"
+}
+
+func (devices *DeviceSet) getPoolDevName() string {
+	return getDevName(devices.getPoolName())
+}
+
+func (devices *DeviceSet) hasImage(name string) bool {
+	dirname := devices.loopbackDir()
+	filename := path.Join(dirname, name)
+
+	_, err := osStat(filename)
+	return err == nil
+}
+
+// ensureImage creates a sparse file of <size> bytes at the path
+// <root>/devicemapper/<name>.
+// If the file already exists, it does nothing.
+// Either way it returns the full path.
+func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) {
+	dirname := devices.loopbackDir()
+	filename := path.Join(dirname, name)
+
+	if err := osMkdirAll(dirname, 0700); err != nil && !osIsExist(err) {
+		return "", err
+	}
+
+	if _, err := osStat(filename); err != nil {
+		if !osIsNotExist(err) {
+			return "", err
+		}
+		utils.Debugf("Creating loopback file %s for device-manage use", filename)
+		file, err := osOpenFile(filename, osORdWr|osOCreate, 0600)
+		if err != nil {
+			return "", err
+		}
+		defer file.Close()
+
+		if err = file.Truncate(size); err != nil {
+			return "", err
+		}
+	}
+	return filename, nil
+}
+
+func (devices *DeviceSet) allocateDeviceId() int {
+	// TODO: Add smarter reuse of deleted devices
+	id := devices.nextFreeDevice
+	devices.nextFreeDevice = devices.nextFreeDevice + 1
+	return id
+}
+
+func (devices *DeviceSet) allocateTransactionId() uint64 {
+	devices.NewTransactionId = devices.NewTransactionId + 1
+	return devices.NewTransactionId
+}
+
+func (devices *DeviceSet) saveMetadata() error {
+	jsonData, err := json.Marshal(devices.MetaData)
+	if err != nil {
+		return fmt.Errorf("Error encoding metaadata to json: %s", err)
+	}
+	tmpFile, err := ioutil.TempFile(filepath.Dir(devices.jsonFile()), ".json")
+	if err != nil {
+		return fmt.Errorf("Error creating metadata file: %s", err)
+	}
+
+	n, err := tmpFile.Write(jsonData)
+	if err != nil {
+		return fmt.Errorf("Error writing metadata to %s: %s", tmpFile.Name(), err)
+	}
+	if n < len(jsonData) {
+		return io.ErrShortWrite
+	}
+	if err := tmpFile.Sync(); err != nil {
+		return fmt.Errorf("Error syncing metadata file %s: %s", tmpFile.Name(), err)
+	}
+	if err := tmpFile.Close(); err != nil {
+		return fmt.Errorf("Error closing metadata file %s: %s", tmpFile.Name(), err)
+	}
+	if err := osRename(tmpFile.Name(), devices.jsonFile()); err != nil {
+		return fmt.Errorf("Error committing metadata file", err)
+	}
+
+	if devices.NewTransactionId != devices.TransactionId {
+		if err = setTransactionId(devices.getPoolDevName(), devices.TransactionId, devices.NewTransactionId); err != nil {
+			return fmt.Errorf("Error setting devmapper transition ID: %s", err)
+		}
+		devices.TransactionId = devices.NewTransactionId
+	}
+	return nil
+}
+
+func (devices *DeviceSet) registerDevice(id int, hash string, size uint64) (*DevInfo, error) {
+	utils.Debugf("registerDevice(%v, %v)", id, hash)
+	info := &DevInfo{
+		Hash:          hash,
+		DeviceId:      id,
+		Size:          size,
+		TransactionId: devices.allocateTransactionId(),
+		Initialized:   false,
+		devices:       devices,
+	}
+
+	devices.Devices[hash] = info
+	if err := devices.saveMetadata(); err != nil {
+		// Try to remove unused device
+		delete(devices.Devices, hash)
+		return nil, err
+	}
+
+	return info, nil
+}
+
+func (devices *DeviceSet) activateDeviceIfNeeded(hash string) error {
+	utils.Debugf("activateDeviceIfNeeded(%v)", hash)
+	info := devices.Devices[hash]
+	if info == nil {
+		return fmt.Errorf("Unknown device %s", hash)
+	}
+
+	if devinfo, _ := getInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 {
+		return nil
+	}
+
+	return activateDevice(devices.getPoolDevName(), info.Name(), info.DeviceId, info.Size)
+}
+
+func (devices *DeviceSet) createFilesystem(info *DevInfo) error {
+	devname := info.DevName()
+
+	err := execRun("mkfs.ext4", "-E", "discard,lazy_itable_init=0,lazy_journal_init=0", devname)
+	if err != nil {
+		err = execRun("mkfs.ext4", "-E", "discard,lazy_itable_init=0", devname)
+	}
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	return nil
+}
+
+func (devices *DeviceSet) loadMetaData() error {
+	utils.Debugf("loadMetadata()")
+	defer utils.Debugf("loadMetadata END")
+	_, _, _, params, err := getStatus(devices.getPoolName())
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	if _, err := fmt.Sscanf(params, "%d", &devices.TransactionId); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	devices.NewTransactionId = devices.TransactionId
+
+	jsonData, err := ioutil.ReadFile(devices.jsonFile())
+	if err != nil && !osIsNotExist(err) {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	devices.MetaData.Devices = make(map[string]*DevInfo)
+	if jsonData != nil {
+		if err := json.Unmarshal(jsonData, &devices.MetaData); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	for hash, d := range devices.Devices {
+		d.Hash = hash
+		d.devices = devices
+
+		if d.DeviceId >= devices.nextFreeDevice {
+			devices.nextFreeDevice = d.DeviceId + 1
+		}
+
+		// If the transaction id is larger than the actual one we lost the device due to some crash
+		if d.TransactionId > devices.TransactionId {
+			utils.Debugf("Removing lost device %s with id %d", hash, d.TransactionId)
+			delete(devices.Devices, hash)
+		}
+	}
+	return nil
+}
+
+func (devices *DeviceSet) setupBaseImage() error {
+	oldInfo := devices.Devices[""]
+	if oldInfo != nil && oldInfo.Initialized {
+		return nil
+	}
+
+	if oldInfo != nil && !oldInfo.Initialized {
+		utils.Debugf("Removing uninitialized base image")
+		if err := devices.removeDevice(""); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	utils.Debugf("Initializing base device-manager snapshot")
+
+	id := devices.allocateDeviceId()
+
+	// Create initial device
+	if err := createDevice(devices.getPoolDevName(), id); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	utils.Debugf("Registering base device (id %v) with FS size %v", id, DefaultBaseFsSize)
+	info, err := devices.registerDevice(id, "", DefaultBaseFsSize)
+	if err != nil {
+		_ = deleteDevice(devices.getPoolDevName(), id)
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	utils.Debugf("Creating filesystem on base device-manager snapshot")
+
+	if err = devices.activateDeviceIfNeeded(""); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	if err := devices.createFilesystem(info); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	info.Initialized = true
+	if err = devices.saveMetadata(); err != nil {
+		info.Initialized = false
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func setCloseOnExec(name string) {
+	if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil {
+		for _, i := range fileInfos {
+			link, _ := osReadlink(filepath.Join("/proc/self/fd", i.Name()))
+			if link == name {
+				fd, err := strconv.Atoi(i.Name())
+				if err == nil {
+					sysCloseOnExec(fd)
+				}
+			}
+		}
+	}
+}
+
+func (devices *DeviceSet) log(level int, file string, line int, dmError int, message string) {
+	if level >= 7 {
+		return // Ignore _LOG_DEBUG
+	}
+
+	utils.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message)
+}
+
+func major(device uint64) uint64 {
+	return (device >> 8) & 0xfff
+}
+
+func minor(device uint64) uint64 {
+	return (device & 0xff) | ((device >> 12) & 0xfff00)
+}
+
+func (devices *DeviceSet) ResizePool(size int64) error {
+	dirname := devices.loopbackDir()
+	datafilename := path.Join(dirname, "data")
+	metadatafilename := path.Join(dirname, "metadata")
+
+	datafile, err := osOpenFile(datafilename, osORdWr, 0)
+	if datafile == nil {
+		return err
+	}
+	defer datafile.Close()
+
+	fi, err := datafile.Stat()
+	if fi == nil {
+		return err
+	}
+
+	if fi.Size() > size {
+		return fmt.Errorf("Can't shrink file")
+	}
+
+	dataloopback := FindLoopDeviceFor(&osFile{File: datafile})
+	if dataloopback == nil {
+		return fmt.Errorf("Unable to find loopback mount for: %s", datafilename)
+	}
+	defer dataloopback.Close()
+
+	metadatafile, err := osOpenFile(metadatafilename, osORdWr, 0)
+	if metadatafile == nil {
+		return err
+	}
+	defer metadatafile.Close()
+
+	metadataloopback := FindLoopDeviceFor(&osFile{File: metadatafile})
+	if metadataloopback == nil {
+		return fmt.Errorf("Unable to find loopback mount for: %s", metadatafilename)
+	}
+	defer metadataloopback.Close()
+
+	// Grow loopback file
+	if err := datafile.Truncate(size); err != nil {
+		return fmt.Errorf("Unable to grow loopback file: %s", err)
+	}
+
+	// Reload size for loopback device
+	if err := LoopbackSetCapacity(dataloopback); err != nil {
+		return fmt.Errorf("Unable to update loopback capacity: %s", err)
+	}
+
+	// Suspend the pool
+	if err := suspendDevice(devices.getPoolName()); err != nil {
+		return fmt.Errorf("Unable to suspend pool: %s", err)
+	}
+
+	// Reload with the new block sizes
+	if err := reloadPool(devices.getPoolName(), dataloopback, metadataloopback); err != nil {
+		return fmt.Errorf("Unable to reload pool: %s", err)
+	}
+
+	// Resume the pool
+	if err := resumeDevice(devices.getPoolName()); err != nil {
+		return fmt.Errorf("Unable to resume pool: %s", err)
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) initDevmapper(doInit bool) error {
+	logInit(devices)
+
+	// Make sure the sparse images exist in <root>/devicemapper/data and
+	// <root>/devicemapper/metadata
+
+	hasData := devices.hasImage("data")
+	hasMetadata := devices.hasImage("metadata")
+
+	if !doInit && !hasData {
+		return fmt.Errorf("Looback data file not found %s")
+	}
+
+	if !doInit && !hasMetadata {
+		return fmt.Errorf("Looback metadata file not found %s")
+	}
+
+	createdLoopback := !hasData || !hasMetadata
+	data, err := devices.ensureImage("data", DefaultDataLoopbackSize)
+	if err != nil {
+		utils.Debugf("Error device ensureImage (data): %s\n", err)
+		return err
+	}
+	metadata, err := devices.ensureImage("metadata", DefaultMetaDataLoopbackSize)
+	if err != nil {
+		utils.Debugf("Error device ensureImage (metadata): %s\n", err)
+		return err
+	}
+
+	// Set the device prefix from the device id and inode of the docker root dir
+
+	st, err := osStat(devices.root)
+	if err != nil {
+		return fmt.Errorf("Error looking up dir %s: %s", devices.root, err)
+	}
+	sysSt := toSysStatT(st.Sys())
+	// "reg-" stands for "regular file".
+	// In the future we might use "dev-" for "device file", etc.
+	// docker-maj,min[-inode] stands for:
+	//	- Managed by docker
+	//	- The target of this device is at major <maj> and minor <min>
+	//	- If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself.
+	devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino)
+	utils.Debugf("Generated prefix: %s", devices.devicePrefix)
+
+	// Check for the existence of the device <prefix>-pool
+	utils.Debugf("Checking for existence of the pool '%s'", devices.getPoolName())
+	info, err := getInfo(devices.getPoolName())
+	if info == nil {
+		utils.Debugf("Error device getInfo: %s", err)
+		return err
+	}
+
+	// It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files
+	// that are not Close-on-exec, and lxc-start will die if it inherits any unexpected files,
+	// so we add this badhack to make sure it closes itself
+	setCloseOnExec("/dev/mapper/control")
+
+	// If the pool doesn't exist, create it
+	if info.Exists == 0 {
+		utils.Debugf("Pool doesn't exist. Creating it.")
+
+		dataFile, err := AttachLoopDevice(data)
+		if err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+		defer dataFile.Close()
+
+		metadataFile, err := AttachLoopDevice(metadata)
+		if err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+		defer metadataFile.Close()
+
+		if err := createPool(devices.getPoolName(), dataFile, metadataFile); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	// If we didn't just create the data or metadata image, we need to
+	// load the metadata from the existing file.
+	if !createdLoopback {
+		if err = devices.loadMetaData(); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	// Setup the base image
+	if doInit {
+		if err := devices.setupBaseImage(); err != nil {
+			utils.Debugf("Error device setupBaseImage: %s\n", err)
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
+	devices.Lock()
+	defer devices.Unlock()
+
+	if devices.Devices[hash] != nil {
+		return fmt.Errorf("hash %s already exists", hash)
+	}
+
+	baseInfo := devices.Devices[baseHash]
+	if baseInfo == nil {
+		return fmt.Errorf("Error adding device for '%s': can't find device for parent '%s'", hash, baseHash)
+	}
+
+	deviceId := devices.allocateDeviceId()
+
+	if err := devices.createSnapDevice(devices.getPoolDevName(), deviceId, baseInfo.Name(), baseInfo.DeviceId); err != nil {
+		utils.Debugf("Error creating snap device: %s\n", err)
+		return err
+	}
+
+	if _, err := devices.registerDevice(deviceId, hash, baseInfo.Size); err != nil {
+		deleteDevice(devices.getPoolDevName(), deviceId)
+		utils.Debugf("Error registering device: %s\n", err)
+		return err
+	}
+	return nil
+}
+
+func (devices *DeviceSet) removeDevice(hash string) error {
+	info := devices.Devices[hash]
+	if info == nil {
+		return fmt.Errorf("hash %s doesn't exists", hash)
+	}
+
+	devinfo, _ := getInfo(info.Name())
+	if devinfo != nil && devinfo.Exists != 0 {
+		if err := removeDevice(info.Name()); err != nil {
+			utils.Debugf("Error removing device: %s\n", err)
+			return err
+		}
+	}
+
+	if info.Initialized {
+		info.Initialized = false
+		if err := devices.saveMetadata(); err != nil {
+			utils.Debugf("Error saving meta data: %s\n", err)
+			return err
+		}
+	}
+
+	if err := deleteDevice(devices.getPoolDevName(), info.DeviceId); err != nil {
+		utils.Debugf("Error deleting device: %s\n", err)
+		return err
+	}
+
+	devices.allocateTransactionId()
+	delete(devices.Devices, info.Hash)
+
+	if err := devices.saveMetadata(); err != nil {
+		devices.Devices[info.Hash] = info
+		utils.Debugf("Error saving meta data: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) RemoveDevice(hash string) error {
+	devices.Lock()
+	defer devices.Unlock()
+
+	return devices.removeDevice(hash)
+}
+
+func (devices *DeviceSet) deactivateDevice(hash string) error {
+	utils.Debugf("[devmapper] deactivateDevice(%s)", hash)
+	defer utils.Debugf("[devmapper] deactivateDevice END")
+	var devname string
+	// FIXME: shouldn't we just register the pool into devices?
+	devname, err := devices.byHash(hash)
+	if err != nil {
+		return err
+	}
+	devinfo, err := getInfo(devname)
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	if devinfo.Exists != 0 {
+		if err := removeDevice(devname); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+		if err := devices.waitRemove(hash); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// waitRemove blocks until either:
+// a) the device registered at <device_set_prefix>-<hash> is removed,
+// or b) the 1 second timeout expires.
+func (devices *DeviceSet) waitRemove(hash string) error {
+	utils.Debugf("[deviceset %s] waitRemove(%s)", devices.devicePrefix, hash)
+	defer utils.Debugf("[deviceset %s] waitRemove END", devices.devicePrefix, hash)
+	devname, err := devices.byHash(hash)
+	if err != nil {
+		return err
+	}
+	i := 0
+	for ; i < 1000; i += 1 {
+		devinfo, err := getInfo(devname)
+		if err != nil {
+			// If there is an error we assume the device doesn't exist.
+			// The error might actually be something else, but we can't differentiate.
+			return nil
+		}
+		if i%100 == 0 {
+			utils.Debugf("Waiting for removal of %s: exists=%d", devname, devinfo.Exists)
+		}
+		if devinfo.Exists == 0 {
+			break
+		}
+
+		time.Sleep(1 * time.Millisecond)
+	}
+	if i == 1000 {
+		return fmt.Errorf("Timeout while waiting for device %s to be removed", devname)
+	}
+	return nil
+}
+
+// waitClose blocks until either:
+// a) the device registered at <device_set_prefix>-<hash> is closed,
+// or b) the 1 second timeout expires.
+func (devices *DeviceSet) waitClose(hash string) error {
+	devname, err := devices.byHash(hash)
+	if err != nil {
+		return err
+	}
+	i := 0
+	for ; i < 1000; i += 1 {
+		devinfo, err := getInfo(devname)
+		if err != nil {
+			return err
+		}
+		if i%100 == 0 {
+			utils.Debugf("Waiting for unmount of %s: opencount=%d", devname, devinfo.OpenCount)
+		}
+		if devinfo.OpenCount == 0 {
+			break
+		}
+		time.Sleep(1 * time.Millisecond)
+	}
+	if i == 1000 {
+		return fmt.Errorf("Timeout while waiting for device %s to close", devname)
+	}
+	return nil
+}
+
+// byHash is a hack to allow looking up the deviceset's pool by the hash "pool".
+// FIXME: it seems probably cleaner to register the pool in devices.Devices,
+// but I am afraid of arcane implications deep in the devicemapper code,
+// so this will do.
+func (devices *DeviceSet) byHash(hash string) (devname string, err error) {
+	if hash == "pool" {
+		return devices.getPoolDevName(), nil
+	}
+	info := devices.Devices[hash]
+	if info == nil {
+		return "", fmt.Errorf("hash %s doesn't exists", hash)
+	}
+	return info.Name(), nil
+}
+
+func (devices *DeviceSet) Shutdown() error {
+	devices.Lock()
+	defer devices.Unlock()
+
+	utils.Debugf("[deviceset %s] shutdown()", devices.devicePrefix)
+	utils.Debugf("[devmapper] Shutting down DeviceSet: %s", devices.root)
+	defer utils.Debugf("[deviceset %s] shutdown END", devices.devicePrefix)
+
+	for path, count := range devices.activeMounts {
+		for i := count; i > 0; i-- {
+			if err := sysUnmount(path, 0); err != nil {
+				utils.Debugf("Shutdown unmounting %s, error: %s\n", path, err)
+			}
+		}
+		delete(devices.activeMounts, path)
+	}
+
+	for _, d := range devices.Devices {
+		if err := devices.waitClose(d.Hash); err != nil {
+			utils.Errorf("Warning: error waiting for device %s to unmount: %s\n", d.Hash, err)
+		}
+		if err := devices.deactivateDevice(d.Hash); err != nil {
+			utils.Debugf("Shutdown deactivate %s , error: %s\n", d.Hash, err)
+		}
+	}
+
+	pool := devices.getPoolDevName()
+	if devinfo, err := getInfo(pool); err == nil && devinfo.Exists != 0 {
+		if err := devices.deactivateDevice("pool"); err != nil {
+			utils.Debugf("Shutdown deactivate %s , error: %s\n", pool, err)
+		}
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) MountDevice(hash, path string, readOnly bool) error {
+	devices.Lock()
+	defer devices.Unlock()
+
+	if err := devices.activateDeviceIfNeeded(hash); err != nil {
+		return fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
+	}
+
+	info := devices.Devices[hash]
+
+	var flags uintptr = sysMsMgcVal
+
+	if readOnly {
+		flags = flags | sysMsRdOnly
+	}
+
+	err := sysMount(info.DevName(), path, "ext4", flags, "discard")
+	if err != nil && err == sysEInval {
+		err = sysMount(info.DevName(), path, "ext4", flags, "")
+	}
+	if err != nil {
+		return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), path, err)
+	}
+
+	count := devices.activeMounts[path]
+	devices.activeMounts[path] = count + 1
+
+	return devices.setInitialized(hash)
+}
+
+func (devices *DeviceSet) UnmountDevice(hash, path string, deactivate bool) error {
+	utils.Debugf("[devmapper] UnmountDevice(hash=%s path=%s)", hash, path)
+	defer utils.Debugf("[devmapper] UnmountDevice END")
+	devices.Lock()
+	defer devices.Unlock()
+
+	utils.Debugf("[devmapper] Unmount(%s)", path)
+	if err := sysUnmount(path, 0); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	utils.Debugf("[devmapper] Unmount done")
+	// Wait for the unmount to be effective,
+	// by watching the value of Info.OpenCount for the device
+	if err := devices.waitClose(hash); err != nil {
+		return err
+	}
+
+	if count := devices.activeMounts[path]; count > 1 {
+		devices.activeMounts[path] = count - 1
+	} else {
+		delete(devices.activeMounts, path)
+	}
+
+	if deactivate {
+		devices.deactivateDevice(hash)
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) HasDevice(hash string) bool {
+	devices.Lock()
+	defer devices.Unlock()
+
+	return devices.Devices[hash] != nil
+}
+
+func (devices *DeviceSet) HasInitializedDevice(hash string) bool {
+	devices.Lock()
+	defer devices.Unlock()
+
+	info := devices.Devices[hash]
+	return info != nil && info.Initialized
+}
+
+func (devices *DeviceSet) HasActivatedDevice(hash string) bool {
+	devices.Lock()
+	defer devices.Unlock()
+
+	info := devices.Devices[hash]
+	if info == nil {
+		return false
+	}
+	devinfo, _ := getInfo(info.Name())
+	return devinfo != nil && devinfo.Exists != 0
+}
+
+func (devices *DeviceSet) setInitialized(hash string) error {
+	info := devices.Devices[hash]
+	if info == nil {
+		return fmt.Errorf("Unknown device %s", hash)
+	}
+
+	info.Initialized = true
+	if err := devices.saveMetadata(); err != nil {
+		info.Initialized = false
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) List() []string {
+	devices.Lock()
+	defer devices.Unlock()
+
+	ids := make([]string, len(devices.Devices))
+	i := 0
+	for k := range devices.Devices {
+		ids[i] = k
+		i++
+	}
+	return ids
+}
+
+func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) {
+	var params string
+	_, sizeInSectors, _, params, err = getStatus(devName)
+	if err != nil {
+		return
+	}
+	if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil {
+		return
+	}
+	return
+}
+
+func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
+	devices.Lock()
+	defer devices.Unlock()
+
+	info := devices.Devices[hash]
+	if info == nil {
+		return nil, fmt.Errorf("No device %s", hash)
+	}
+
+	status := &DevStatus{
+		DeviceId:      info.DeviceId,
+		Size:          info.Size,
+		TransactionId: info.TransactionId,
+	}
+
+	if err := devices.activateDeviceIfNeeded(hash); err != nil {
+		return nil, fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
+	}
+
+	if sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()); err != nil {
+		return nil, err
+	} else {
+		status.SizeInSectors = sizeInSectors
+		status.MappedSectors = mappedSectors
+		status.HighestMappedSector = highestMappedSector
+	}
+
+	return status, nil
+}
+
+func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionId, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) {
+	var params string
+	if _, totalSizeInSectors, _, params, err = getStatus(devices.getPoolName()); err == nil {
+		_, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionId, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal)
+	}
+	return
+}
+
+func (devices *DeviceSet) Status() *Status {
+	devices.Lock()
+	defer devices.Unlock()
+
+	status := &Status{}
+
+	status.PoolName = devices.getPoolName()
+	status.DataLoopback = path.Join(devices.loopbackDir(), "data")
+	status.MetadataLoopback = path.Join(devices.loopbackDir(), "metadata")
+
+	totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus()
+	if err == nil {
+		// Convert from blocks to bytes
+		blockSizeInSectors := totalSizeInSectors / dataTotal
+
+		status.Data.Used = dataUsed * blockSizeInSectors * 512
+		status.Data.Total = dataTotal * blockSizeInSectors * 512
+
+		// metadata blocks are always 4k
+		status.Metadata.Used = metadataUsed * 4096
+		status.Metadata.Total = metadataTotal * 4096
+
+		status.SectorSize = blockSizeInSectors * 512
+	}
+
+	return status
+}
+
+func NewDeviceSet(root string, doInit bool) (*DeviceSet, error) {
+	SetDevDir("/dev")
+
+	devices := &DeviceSet{
+		root:         root,
+		MetaData:     MetaData{Devices: make(map[string]*DevInfo)},
+		activeMounts: make(map[string]int),
+	}
+
+	if err := devices.initDevmapper(doInit); err != nil {
+		return nil, err
+	}
+
+	return devices, nil
+}

+ 576 - 0
graphdriver/devmapper/devmapper.go

@@ -0,0 +1,576 @@
+package devmapper
+
+import (
+	"errors"
+	"fmt"
+	"github.com/dotcloud/docker/utils"
+	"runtime"
+)
+
+type DevmapperLogger interface {
+	log(level int, file string, line int, dmError int, message string)
+}
+
+const (
+	DeviceCreate TaskType = iota
+	DeviceReload
+	DeviceRemove
+	DeviceRemoveAll
+	DeviceSuspend
+	DeviceResume
+	DeviceInfo
+	DeviceDeps
+	DeviceRename
+	DeviceVersion
+	DeviceStatus
+	DeviceTable
+	DeviceWaitevent
+	DeviceList
+	DeviceClear
+	DeviceMknodes
+	DeviceListVersions
+	DeviceTargetMsg
+	DeviceSetGeometry
+)
+
+const (
+	AddNodeOnResume AddNodeType = iota
+	AddNodeOnCreate
+)
+
+var (
+	ErrTaskRun                = errors.New("dm_task_run failed")
+	ErrTaskSetName            = errors.New("dm_task_set_name failed")
+	ErrTaskSetMessage         = errors.New("dm_task_set_message failed")
+	ErrTaskSetAddNode         = errors.New("dm_task_set_add_node failed")
+	ErrTaskSetRo              = errors.New("dm_task_set_ro failed")
+	ErrTaskAddTarget          = errors.New("dm_task_add_target failed")
+	ErrTaskSetSector          = errors.New("dm_task_set_sector failed")
+	ErrTaskGetInfo            = errors.New("dm_task_get_info failed")
+	ErrTaskSetCookie          = errors.New("dm_task_set_cookie failed")
+	ErrNilCookie              = errors.New("cookie ptr can't be nil")
+	ErrAttachLoopbackDevice   = errors.New("loopback mounting failed")
+	ErrGetBlockSize           = errors.New("Can't get block size")
+	ErrUdevWait               = errors.New("wait on udev cookie failed")
+	ErrSetDevDir              = errors.New("dm_set_dev_dir failed")
+	ErrGetLibraryVersion      = errors.New("dm_get_library_version failed")
+	ErrCreateRemoveTask       = errors.New("Can't create task of type DeviceRemove")
+	ErrRunRemoveDevice        = errors.New("running removeDevice failed")
+	ErrInvalidAddNode         = errors.New("Invalide AddNoce type")
+	ErrGetLoopbackBackingFile = errors.New("Unable to get loopback backing file")
+	ErrLoopbackSetCapacity    = errors.New("Unable set loopback capacity")
+)
+
+type (
+	Task struct {
+		unmanaged *CDmTask
+	}
+	Info struct {
+		Exists        int
+		Suspended     int
+		LiveTable     int
+		InactiveTable int
+		OpenCount     int32
+		EventNr       uint32
+		Major         uint32
+		Minor         uint32
+		ReadOnly      int
+		TargetCount   int32
+	}
+	TaskType    int
+	AddNodeType int
+)
+
+func (t *Task) destroy() {
+	if t != nil {
+		DmTaskDestroy(t.unmanaged)
+		runtime.SetFinalizer(t, nil)
+	}
+}
+
+func TaskCreate(tasktype TaskType) *Task {
+	Ctask := DmTaskCreate(int(tasktype))
+	if Ctask == nil {
+		return nil
+	}
+	task := &Task{unmanaged: Ctask}
+	runtime.SetFinalizer(task, (*Task).destroy)
+	return task
+}
+
+func (t *Task) Run() error {
+	if res := DmTaskRun(t.unmanaged); res != 1 {
+		return ErrTaskRun
+	}
+	return nil
+}
+
+func (t *Task) SetName(name string) error {
+	if res := DmTaskSetName(t.unmanaged, name); res != 1 {
+		return ErrTaskSetName
+	}
+	return nil
+}
+
+func (t *Task) SetMessage(message string) error {
+	if res := DmTaskSetMessage(t.unmanaged, message); res != 1 {
+		return ErrTaskSetMessage
+	}
+	return nil
+}
+
+func (t *Task) SetSector(sector uint64) error {
+	if res := DmTaskSetSector(t.unmanaged, sector); res != 1 {
+		return ErrTaskSetSector
+	}
+	return nil
+}
+
+func (t *Task) SetCookie(cookie *uint, flags uint16) error {
+	if cookie == nil {
+		return ErrNilCookie
+	}
+	if res := DmTaskSetCookie(t.unmanaged, cookie, flags); res != 1 {
+		return ErrTaskSetCookie
+	}
+	return nil
+}
+
+func (t *Task) SetAddNode(addNode AddNodeType) error {
+	if addNode != AddNodeOnResume && addNode != AddNodeOnCreate {
+		return ErrInvalidAddNode
+	}
+	if res := DmTaskSetAddNode(t.unmanaged, addNode); res != 1 {
+		return ErrTaskSetAddNode
+	}
+	return nil
+}
+
+func (t *Task) SetRo() error {
+	if res := DmTaskSetRo(t.unmanaged); res != 1 {
+		return ErrTaskSetRo
+	}
+	return nil
+}
+
+func (t *Task) AddTarget(start, size uint64, ttype, params string) error {
+	if res := DmTaskAddTarget(t.unmanaged, start, size,
+		ttype, params); res != 1 {
+		return ErrTaskAddTarget
+	}
+	return nil
+}
+
+func (t *Task) GetInfo() (*Info, error) {
+	info := &Info{}
+	if res := DmTaskGetInfo(t.unmanaged, info); res != 1 {
+		return nil, ErrTaskGetInfo
+	}
+	return info, nil
+}
+
+func (t *Task) GetNextTarget(next uintptr) (nextPtr uintptr, start uint64,
+	length uint64, targetType string, params string) {
+
+	return DmGetNextTarget(t.unmanaged, next, &start, &length,
+			&targetType, &params),
+		start, length, targetType, params
+}
+
+func AttachLoopDevice(filename string) (*osFile, error) {
+	var fd int
+	res := DmAttachLoopDevice(filename, &fd)
+	if res == "" {
+		return nil, ErrAttachLoopbackDevice
+	}
+	return &osFile{File: osNewFile(uintptr(fd), res)}, nil
+}
+
+func getLoopbackBackingFile(file *osFile) (uint64, uint64, error) {
+	dev, inode, err := DmGetLoopbackBackingFile(file.Fd())
+	if err != 0 {
+		return 0, 0, ErrGetLoopbackBackingFile
+	}
+	return dev, inode, nil
+}
+
+func LoopbackSetCapacity(file *osFile) error {
+	if err := DmLoopbackSetCapacity(file.Fd()); err != 0 {
+		return ErrLoopbackSetCapacity
+	}
+	return nil
+}
+
+func FindLoopDeviceFor(file *osFile) *osFile {
+	stat, err := file.Stat()
+	if err != nil {
+		return nil
+	}
+	targetInode := stat.Sys().(*sysStatT).Ino
+	targetDevice := stat.Sys().(*sysStatT).Dev
+
+	for i := 0; true; i++ {
+		path := fmt.Sprintf("/dev/loop%d", i)
+
+		file, err := osOpenFile(path, osORdWr, 0)
+		if err != nil {
+			if osIsNotExist(err) {
+				return nil
+			}
+
+			// Ignore all errors until the first not-exist
+			// we want to continue looking for the file
+			continue
+		}
+
+		dev, inode, err := getLoopbackBackingFile(&osFile{File: file})
+		if err == nil && dev == targetDevice && inode == targetInode {
+			return &osFile{File: file}
+		}
+
+		file.Close()
+	}
+
+	return nil
+}
+
+func UdevWait(cookie uint) error {
+	if res := DmUdevWait(cookie); res != 1 {
+		utils.Debugf("Failed to wait on udev cookie %d", cookie)
+		return ErrUdevWait
+	}
+	return nil
+}
+
+func LogInitVerbose(level int) {
+	DmLogInitVerbose(level)
+}
+
+var dmLogger DevmapperLogger = nil
+
+func logInit(logger DevmapperLogger) {
+	dmLogger = logger
+	LogWithErrnoInit()
+}
+
+func SetDevDir(dir string) error {
+	if res := DmSetDevDir(dir); res != 1 {
+		utils.Debugf("Error dm_set_dev_dir")
+		return ErrSetDevDir
+	}
+	return nil
+}
+
+func GetLibraryVersion() (string, error) {
+	var version string
+	if res := DmGetLibraryVersion(&version); res != 1 {
+		return "", ErrGetLibraryVersion
+	}
+	return version, nil
+}
+
+// Useful helper for cleanup
+func RemoveDevice(name string) error {
+	task := TaskCreate(DeviceRemove)
+	if task == nil {
+		return ErrCreateRemoveTask
+	}
+	if err := task.SetName(name); err != nil {
+		utils.Debugf("Can't set task name %s", name)
+		return err
+	}
+	if err := task.Run(); err != nil {
+		return ErrRunRemoveDevice
+	}
+	return nil
+}
+
+func GetBlockDeviceSize(file *osFile) (uint64, error) {
+	size, errno := DmGetBlockSize(file.Fd())
+	if size == -1 || errno != 0 {
+		return 0, ErrGetBlockSize
+	}
+	return uint64(size), nil
+}
+
+// This is the programmatic example of "dmsetup create"
+func createPool(poolName string, dataFile, metadataFile *osFile) error {
+	task, err := createTask(DeviceCreate, poolName)
+	if task == nil {
+		return err
+	}
+
+	size, err := GetBlockDeviceSize(dataFile)
+	if err != nil {
+		return fmt.Errorf("Can't get data size")
+	}
+
+	params := metadataFile.Name() + " " + dataFile.Name() + " 128 32768"
+	if err := task.AddTarget(0, size/512, "thin-pool", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate (createPool)")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func reloadPool(poolName string, dataFile, metadataFile *osFile) error {
+	task, err := createTask(DeviceReload, poolName)
+	if task == nil {
+		return err
+	}
+
+	size, err := GetBlockDeviceSize(dataFile)
+	if err != nil {
+		return fmt.Errorf("Can't get data size")
+	}
+
+	params := metadataFile.Name() + " " + dataFile.Name() + " 128 32768"
+	if err := task.AddTarget(0, size/512, "thin-pool", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate")
+	}
+
+	return nil
+}
+
+func createTask(t TaskType, name string) (*Task, error) {
+	task := TaskCreate(t)
+	if task == nil {
+		return nil, fmt.Errorf("Can't create task of type %d", int(t))
+	}
+	if err := task.SetName(name); err != nil {
+		return nil, fmt.Errorf("Can't set task name %s", name)
+	}
+	return task, nil
+}
+
+func getInfo(name string) (*Info, error) {
+	task, err := createTask(DeviceInfo, name)
+	if task == nil {
+		return nil, err
+	}
+	if err := task.Run(); err != nil {
+		return nil, err
+	}
+	return task.GetInfo()
+}
+
+func getStatus(name string) (uint64, uint64, string, string, error) {
+	task, err := createTask(DeviceStatus, name)
+	if task == nil {
+		utils.Debugf("getStatus: Error createTask: %s", err)
+		return 0, 0, "", "", err
+	}
+	if err := task.Run(); err != nil {
+		utils.Debugf("getStatus: Error Run: %s", err)
+		return 0, 0, "", "", err
+	}
+
+	devinfo, err := task.GetInfo()
+	if err != nil {
+		utils.Debugf("getStatus: Error GetInfo: %s", err)
+		return 0, 0, "", "", err
+	}
+	if devinfo.Exists == 0 {
+		utils.Debugf("getStatus: Non existing device %s", name)
+		return 0, 0, "", "", fmt.Errorf("Non existing device %s", name)
+	}
+
+	_, start, length, targetType, params := task.GetNextTarget(0)
+	return start, length, targetType, params, nil
+}
+
+func setTransactionId(poolName string, oldId uint64, newId uint64) error {
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("set_transaction_id %d %d", oldId, newId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running setTransactionId")
+	}
+	return nil
+}
+
+func suspendDevice(name string) error {
+	task, err := createTask(DeviceSuspend, name)
+	if task == nil {
+		return err
+	}
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceSuspend")
+	}
+	return nil
+}
+
+func resumeDevice(name string) error {
+	task, err := createTask(DeviceResume, name)
+	if task == nil {
+		return err
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceSuspend")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func createDevice(poolName string, deviceId int) error {
+	utils.Debugf("[devmapper] createDevice(poolName=%v, deviceId=%v)", poolName, deviceId)
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("create_thin %d", deviceId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running createDevice")
+	}
+	return nil
+}
+
+func deleteDevice(poolName string, deviceId int) error {
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("delete %d", deviceId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running deleteDevice")
+	}
+	return nil
+}
+
+func removeDevice(name string) error {
+	utils.Debugf("[devmapper] removeDevice START")
+	defer utils.Debugf("[devmapper] removeDevice END")
+	task, err := createTask(DeviceRemove, name)
+	if task == nil {
+		return err
+	}
+	if err = task.Run(); err != nil {
+		return fmt.Errorf("Error running removeDevice")
+	}
+	return nil
+}
+
+func activateDevice(poolName string, name string, deviceId int, size uint64) error {
+	task, err := createTask(DeviceCreate, name)
+	if task == nil {
+		return err
+	}
+
+	params := fmt.Sprintf("%s %d", poolName, deviceId)
+	if err := task.AddTarget(0, size/512, "thin", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+	if err := task.SetAddNode(AddNodeOnCreate); err != nil {
+		return fmt.Errorf("Can't add node")
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate (activateDevice)")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func (devices *DeviceSet) createSnapDevice(poolName string, deviceId int, baseName string, baseDeviceId int) error {
+	devinfo, _ := getInfo(baseName)
+	doSuspend := devinfo != nil && devinfo.Exists != 0
+
+	if doSuspend {
+		if err := suspendDevice(baseName); err != nil {
+			return err
+		}
+	}
+
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("create_snap %d %d", deviceId, baseDeviceId)); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Error running DeviceCreate (createSnapDevice)")
+	}
+
+	if doSuspend {
+		if err := resumeDevice(baseName); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}

+ 106 - 0
graphdriver/devmapper/devmapper_doc.go

@@ -0,0 +1,106 @@
+package devmapper
+
+// Definition of struct dm_task and sub structures (from lvm2)
+//
+// struct dm_ioctl {
+// 	/*
+// 	 * The version number is made up of three parts:
+// 	 * major - no backward or forward compatibility,
+// 	 * minor - only backwards compatible,
+// 	 * patch - both backwards and forwards compatible.
+// 	 *
+// 	 * All clients of the ioctl interface should fill in the
+// 	 * version number of the interface that they were
+// 	 * compiled with.
+// 	 *
+// 	 * All recognised ioctl commands (ie. those that don't
+// 	 * return -ENOTTY) fill out this field, even if the
+// 	 * command failed.
+// 	 */
+// 	uint32_t version[3];	/* in/out */
+// 	uint32_t data_size;	/* total size of data passed in
+// 				 * including this struct */
+
+// 	uint32_t data_start;	/* offset to start of data
+// 				 * relative to start of this struct */
+
+// 	uint32_t target_count;	/* in/out */
+// 	int32_t open_count;	/* out */
+// 	uint32_t flags;		/* in/out */
+
+// 	/*
+// 	 * event_nr holds either the event number (input and output) or the
+// 	 * udev cookie value (input only).
+// 	 * The DM_DEV_WAIT ioctl takes an event number as input.
+// 	 * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+// 	 * use the field as a cookie to return in the DM_COOKIE
+// 	 * variable with the uevents they issue.
+// 	 * For output, the ioctls return the event number, not the cookie.
+// 	 */
+// 	uint32_t event_nr;      	/* in/out */
+// 	uint32_t padding;
+
+// 	uint64_t dev;		/* in/out */
+
+// 	char name[DM_NAME_LEN];	/* device name */
+// 	char uuid[DM_UUID_LEN];	/* unique identifier for
+// 				 * the block device */
+// 	char data[7];		/* padding or data */
+// };
+
+// struct target {
+// 	uint64_t start;
+// 	uint64_t length;
+// 	char *type;
+// 	char *params;
+
+// 	struct target *next;
+// };
+
+// typedef enum {
+// 	DM_ADD_NODE_ON_RESUME, /* add /dev/mapper node with dmsetup resume */
+// 	DM_ADD_NODE_ON_CREATE  /* add /dev/mapper node with dmsetup create */
+// } dm_add_node_t;
+
+// struct dm_task {
+// 	int type;
+// 	char *dev_name;
+// 	char *mangled_dev_name;
+
+// 	struct target *head, *tail;
+
+// 	int read_only;
+// 	uint32_t event_nr;
+// 	int major;
+// 	int minor;
+// 	int allow_default_major_fallback;
+// 	uid_t uid;
+// 	gid_t gid;
+// 	mode_t mode;
+// 	uint32_t read_ahead;
+// 	uint32_t read_ahead_flags;
+// 	union {
+// 		struct dm_ioctl *v4;
+// 	} dmi;
+// 	char *newname;
+// 	char *message;
+// 	char *geometry;
+// 	uint64_t sector;
+// 	int no_flush;
+// 	int no_open_count;
+// 	int skip_lockfs;
+// 	int query_inactive_table;
+// 	int suppress_identical_reload;
+// 	dm_add_node_t add_node;
+// 	uint64_t existing_table_size;
+// 	int cookie_set;
+// 	int new_uuid;
+// 	int secure_data;
+// 	int retry_remove;
+// 	int enable_checks;
+// 	int expected_errno;
+
+// 	char *uuid;
+// 	char *mangled_uuid;
+// };
+//

+ 13 - 0
graphdriver/devmapper/devmapper_log.go

@@ -0,0 +1,13 @@
+package devmapper
+
+import "C"
+
+// Due to the way cgo works this has to be in a separate file, as devmapper.go has
+// definitions in the cgo block, which is incompatible with using "//export"
+
+//export DevmapperLogCallback
+func DevmapperLogCallback(level C.int, file *C.char, line C.int, dm_errno_or_class C.int, message *C.char) {
+	if dmLogger != nil {
+		dmLogger.log(int(level), C.GoString(file), int(line), int(dm_errno_or_class), C.GoString(message))
+	}
+}

+ 285 - 0
graphdriver/devmapper/devmapper_test.go

@@ -0,0 +1,285 @@
+package devmapper
+
+import (
+	"testing"
+)
+
+func TestTaskCreate(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	// Test success
+	taskCreate(t, DeviceInfo)
+
+	// Test Failure
+	DmTaskCreate = dmTaskCreateFail
+	defer func() { DmTaskCreate = dmTaskCreateFct }()
+	if task := TaskCreate(-1); task != nil {
+		t.Fatalf("An error should have occured while creating an invalid task.")
+	}
+}
+
+func TestTaskRun(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	// Perform the RUN
+	if err := task.Run(); err != nil {
+		t.Fatal(err)
+	}
+	// Make sure we don't have error with GetInfo
+	if _, err := task.GetInfo(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskRun = dmTaskRunFail
+	defer func() { DmTaskRun = dmTaskRunFct }()
+
+	task = taskCreate(t, DeviceInfo)
+	// Perform the RUN
+	if err := task.Run(); err != ErrTaskRun {
+		t.Fatalf("An error should have occured while running task.")
+	}
+	// Make sure GetInfo also fails
+	if _, err := task.GetInfo(); err != ErrTaskGetInfo {
+		t.Fatalf("GetInfo should fail if task.Run() failed.")
+	}
+}
+
+func TestTaskSetName(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetName("test"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetName = dmTaskSetNameFail
+	defer func() { DmTaskSetName = dmTaskSetNameFct }()
+
+	if err := task.SetName("test"); err != ErrTaskSetName {
+		t.Fatalf("An error should have occured while runnign SetName.")
+	}
+}
+
+func TestTaskSetMessage(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetMessage("test"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetMessage = dmTaskSetMessageFail
+	defer func() { DmTaskSetMessage = dmTaskSetMessageFct }()
+
+	if err := task.SetMessage("test"); err != ErrTaskSetMessage {
+		t.Fatalf("An error should have occured while runnign SetMessage.")
+	}
+}
+
+func TestTaskSetSector(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetSector(128); err != nil {
+		t.Fatal(err)
+	}
+
+	DmTaskSetSector = dmTaskSetSectorFail
+	defer func() { DmTaskSetSector = dmTaskSetSectorFct }()
+
+	// Test failure
+	if err := task.SetSector(0); err != ErrTaskSetSector {
+		t.Fatalf("An error should have occured while running SetSector.")
+	}
+}
+
+func TestTaskSetCookie(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	var (
+		cookie uint = 0
+		task        = taskCreate(t, DeviceInfo)
+	)
+
+	// Test success
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	if err := task.SetCookie(nil, 0); err != ErrNilCookie {
+		t.Fatalf("An error should have occured while running SetCookie with nil cookie.")
+	}
+
+	DmTaskSetCookie = dmTaskSetCookieFail
+	defer func() { DmTaskSetCookie = dmTaskSetCookieFct }()
+
+	if err := task.SetCookie(&cookie, 0); err != ErrTaskSetCookie {
+		t.Fatalf("An error should have occured while running SetCookie.")
+	}
+}
+
+func TestTaskSetAddNode(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetAddNode(0); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	if err := task.SetAddNode(-1); err != ErrInvalidAddNode {
+		t.Fatalf("An error should have occured running SetAddNode with wrong node.")
+	}
+
+	DmTaskSetAddNode = dmTaskSetAddNodeFail
+	defer func() { DmTaskSetAddNode = dmTaskSetAddNodeFct }()
+
+	if err := task.SetAddNode(0); err != ErrTaskSetAddNode {
+		t.Fatalf("An error should have occured running SetAddNode.")
+	}
+}
+
+func TestTaskSetRo(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetRo(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetRo = dmTaskSetRoFail
+	defer func() { DmTaskSetRo = dmTaskSetRoFct }()
+
+	if err := task.SetRo(); err != ErrTaskSetRo {
+		t.Fatalf("An error should have occured running SetRo.")
+	}
+}
+
+func TestTaskAddTarget(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.AddTarget(0, 128, "thinp", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskAddTarget = dmTaskAddTargetFail
+	defer func() { DmTaskAddTarget = dmTaskAddTargetFct }()
+
+	if err := task.AddTarget(0, 128, "thinp", ""); err != ErrTaskAddTarget {
+		t.Fatalf("An error should have occured running AddTarget.")
+	}
+}
+
+// func TestTaskGetInfo(t *testing.T) {
+// 	task := taskCreate(t, DeviceInfo)
+
+// 	// Test success
+// 	if _, err := task.GetInfo(); err != nil {
+// 		t.Fatal(err)
+// 	}
+
+// 	// Test failure
+// 	DmTaskGetInfo = dmTaskGetInfoFail
+// 	defer func() { DmTaskGetInfo = dmTaskGetInfoFct }()
+
+// 	if _, err := task.GetInfo(); err != ErrTaskGetInfo {
+// 		t.Fatalf("An error should have occured running GetInfo.")
+// 	}
+// }
+
+// func TestTaskGetNextTarget(t *testing.T) {
+// 	task := taskCreate(t, DeviceInfo)
+
+// 	if next, _, _, _, _ := task.GetNextTarget(0); next == 0 {
+// 		t.Fatalf("The next target should not be 0.")
+// 	}
+// }
+
+/// Utils
+func taskCreate(t *testing.T, taskType TaskType) *Task {
+	task := TaskCreate(taskType)
+	if task == nil {
+		t.Fatalf("Error creating task")
+	}
+	return task
+}
+
+/// Failure function replacement
+func dmTaskCreateFail(t int) *CDmTask {
+	return nil
+}
+
+func dmTaskRunFail(task *CDmTask) int {
+	return -1
+}
+
+func dmTaskSetNameFail(task *CDmTask, name string) int {
+	return -1
+}
+
+func dmTaskSetMessageFail(task *CDmTask, message string) int {
+	return -1
+}
+
+func dmTaskSetSectorFail(task *CDmTask, sector uint64) int {
+	return -1
+}
+
+func dmTaskSetCookieFail(task *CDmTask, cookie *uint, flags uint16) int {
+	return -1
+}
+
+func dmTaskSetAddNodeFail(task *CDmTask, addNode AddNodeType) int {
+	return -1
+}
+
+func dmTaskSetRoFail(task *CDmTask) int {
+	return -1
+}
+
+func dmTaskAddTargetFail(task *CDmTask,
+	start, size uint64, ttype, params string) int {
+	return -1
+}
+
+func dmTaskGetInfoFail(task *CDmTask, info *Info) int {
+	return -1
+}
+
+func dmGetNextTargetFail(task *CDmTask, next uintptr, start, length *uint64,
+	target, params *string) uintptr {
+	return 0
+}
+
+func dmAttachLoopDeviceFail(filename string, fd *int) string {
+	return ""
+}
+
+func sysGetBlockSizeFail(fd uintptr, size *uint64) sysErrno {
+	return 1
+}
+
+func dmUdevWaitFail(cookie uint) int {
+	return -1
+}
+
+func dmSetDevDirFail(dir string) int {
+	return -1
+}
+
+func dmGetLibraryVersionFail(version *string) int {
+	return -1
+}

+ 340 - 0
graphdriver/devmapper/devmapper_wrapper.go

@@ -0,0 +1,340 @@
+package devmapper
+
+/*
+#cgo LDFLAGS: -L. -ldevmapper
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <libdevmapper.h>
+#include <linux/loop.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <errno.h>
+
+#ifndef LOOP_CTL_GET_FREE
+#define LOOP_CTL_GET_FREE       0x4C82
+#endif
+
+// FIXME: this could easily be rewritten in go
+char*			attach_loop_device(const char *filename, int *loop_fd_out)
+{
+  struct loop_info64	loopinfo = {0};
+  struct stat		st;
+  char			buf[64];
+  int			i, loop_fd, fd, start_index;
+  char*			loopname;
+
+
+  *loop_fd_out = -1;
+
+  start_index = 0;
+  fd = open("/dev/loop-control", O_RDONLY);
+  if (fd >= 0) {
+    start_index = ioctl(fd, LOOP_CTL_GET_FREE);
+    close(fd);
+
+    if (start_index < 0)
+      start_index = 0;
+  }
+
+  fd = open(filename, O_RDWR);
+  if (fd < 0) {
+    perror("open");
+    return NULL;
+  }
+
+  loop_fd = -1;
+  for (i = start_index ; loop_fd < 0 ; i++ ) {
+    if (sprintf(buf, "/dev/loop%d", i) < 0) {
+	close(fd);
+	return NULL;
+    }
+
+    if (stat(buf, &st)) {
+      if (!S_ISBLK(st.st_mode)) {
+	 fprintf(stderr, "[error] Loopback device %s is not a block device.\n", buf);
+      } else if (errno == ENOENT) {
+	fprintf(stderr, "[error] There are no more loopback device available.\n");
+      } else {
+	fprintf(stderr, "[error] Unkown error trying to stat the loopback device %s (errno: %d).\n", buf, errno);
+      }
+      close(fd);
+      return NULL;
+    }
+
+    loop_fd = open(buf, O_RDWR);
+    if (loop_fd < 0 && errno == ENOENT) {
+      fprintf(stderr, "[error] The loopback device %s does not exists.\n", buf);
+      close(fd);
+      return NULL;
+    } else if (loop_fd < 0) {
+	fprintf(stderr, "[error] Unkown error openning the loopback device %s. (errno: %d)\n", buf, errno);
+	continue;
+    }
+
+    if (ioctl(loop_fd, LOOP_SET_FD, (void *)(size_t)fd) < 0) {
+      int errsv = errno;
+      close(loop_fd);
+      loop_fd = -1;
+      if (errsv != EBUSY) {
+        close(fd);
+        fprintf(stderr, "cannot set up loopback device %s: %s", buf, strerror(errsv));
+        return NULL;
+      }
+      continue;
+    }
+
+    close(fd);
+
+    strncpy((char*)loopinfo.lo_file_name, buf, LO_NAME_SIZE);
+    loopinfo.lo_offset = 0;
+    loopinfo.lo_flags = LO_FLAGS_AUTOCLEAR;
+
+    if (ioctl(loop_fd, LOOP_SET_STATUS64, &loopinfo) < 0) {
+      perror("ioctl LOOP_SET_STATUS64");
+      if (ioctl(loop_fd, LOOP_CLR_FD, 0) < 0) {
+        perror("ioctl LOOP_CLR_FD");
+      }
+      close(loop_fd);
+      fprintf (stderr, "cannot set up loopback device info");
+      return (NULL);
+    }
+
+    loopname = strdup(buf);
+    if (loopname == NULL) {
+      close(loop_fd);
+      return (NULL);
+    }
+
+    *loop_fd_out = loop_fd;
+    return (loopname);
+  }
+
+  return (NULL);
+}
+
+extern void DevmapperLogCallback(int level, char *file, int line, int dm_errno_or_class, char *str);
+
+static void	log_cb(int level, const char *file, int line,
+		       int dm_errno_or_class, const char *f, ...)
+{
+  char buffer[256];
+  va_list ap;
+
+  va_start(ap, f);
+  vsnprintf(buffer, 256, f, ap);
+  va_end(ap);
+
+  DevmapperLogCallback(level, (char *)file, line, dm_errno_or_class, buffer);
+}
+
+static void	log_with_errno_init()
+{
+  dm_log_with_errno_init(log_cb);
+}
+
+*/
+import "C"
+
+import (
+	"unsafe"
+)
+
+type (
+	CDmTask C.struct_dm_task
+)
+
+var (
+	DmAttachLoopDevice       = dmAttachLoopDeviceFct
+	DmGetBlockSize           = dmGetBlockSizeFct
+	DmGetLibraryVersion      = dmGetLibraryVersionFct
+	DmGetNextTarget          = dmGetNextTargetFct
+	DmLogInitVerbose         = dmLogInitVerboseFct
+	DmSetDevDir              = dmSetDevDirFct
+	DmTaskAddTarget          = dmTaskAddTargetFct
+	DmTaskCreate             = dmTaskCreateFct
+	DmTaskDestroy            = dmTaskDestroyFct
+	DmTaskGetInfo            = dmTaskGetInfoFct
+	DmTaskRun                = dmTaskRunFct
+	DmTaskSetAddNode         = dmTaskSetAddNodeFct
+	DmTaskSetCookie          = dmTaskSetCookieFct
+	DmTaskSetMessage         = dmTaskSetMessageFct
+	DmTaskSetName            = dmTaskSetNameFct
+	DmTaskSetRo              = dmTaskSetRoFct
+	DmTaskSetSector          = dmTaskSetSectorFct
+	DmUdevWait               = dmUdevWaitFct
+	GetBlockSize             = getBlockSizeFct
+	LogWithErrnoInit         = logWithErrnoInitFct
+	DmGetLoopbackBackingFile = dmGetLoopbackBackingFileFct
+	DmLoopbackSetCapacity    = dmLoopbackSetCapacityFct
+)
+
+func free(p *C.char) {
+	C.free(unsafe.Pointer(p))
+}
+
+func dmTaskDestroyFct(task *CDmTask) {
+	C.dm_task_destroy((*C.struct_dm_task)(task))
+}
+
+func dmTaskCreateFct(taskType int) *CDmTask {
+	return (*CDmTask)(C.dm_task_create(C.int(taskType)))
+}
+
+func dmTaskRunFct(task *CDmTask) int {
+	return int(C.dm_task_run((*C.struct_dm_task)(task)))
+}
+
+func dmTaskSetNameFct(task *CDmTask, name string) int {
+	Cname := C.CString(name)
+	defer free(Cname)
+
+	return int(C.dm_task_set_name((*C.struct_dm_task)(task),
+		Cname))
+}
+
+func dmTaskSetMessageFct(task *CDmTask, message string) int {
+	Cmessage := C.CString(message)
+	defer free(Cmessage)
+
+	return int(C.dm_task_set_message((*C.struct_dm_task)(task),
+		Cmessage))
+}
+
+func dmTaskSetSectorFct(task *CDmTask, sector uint64) int {
+	return int(C.dm_task_set_sector((*C.struct_dm_task)(task),
+		C.uint64_t(sector)))
+}
+
+func dmTaskSetCookieFct(task *CDmTask, cookie *uint, flags uint16) int {
+	cCookie := C.uint32_t(*cookie)
+	defer func() {
+		*cookie = uint(cCookie)
+	}()
+	return int(C.dm_task_set_cookie((*C.struct_dm_task)(task), &cCookie,
+		C.uint16_t(flags)))
+}
+
+func dmTaskSetAddNodeFct(task *CDmTask, addNode AddNodeType) int {
+	return int(C.dm_task_set_add_node((*C.struct_dm_task)(task),
+		C.dm_add_node_t(addNode)))
+}
+
+func dmTaskSetRoFct(task *CDmTask) int {
+	return int(C.dm_task_set_ro((*C.struct_dm_task)(task)))
+}
+
+func dmTaskAddTargetFct(task *CDmTask,
+	start, size uint64, ttype, params string) int {
+
+	Cttype := C.CString(ttype)
+	defer free(Cttype)
+
+	Cparams := C.CString(params)
+	defer free(Cparams)
+
+	return int(C.dm_task_add_target((*C.struct_dm_task)(task),
+		C.uint64_t(start), C.uint64_t(size), Cttype, Cparams))
+}
+
+func dmGetLoopbackBackingFileFct(fd uintptr) (uint64, uint64, sysErrno) {
+	var lo64 C.struct_loop_info64
+	_, _, err := sysSyscall(sysSysIoctl, fd, C.LOOP_GET_STATUS64,
+		uintptr(unsafe.Pointer(&lo64)))
+	return uint64(lo64.lo_device), uint64(lo64.lo_inode), sysErrno(err)
+}
+
+func dmLoopbackSetCapacityFct(fd uintptr) sysErrno {
+	_, _, err := sysSyscall(sysSysIoctl, fd, C.LOOP_SET_CAPACITY, 0)
+	return sysErrno(err)
+}
+
+func dmGetBlockSizeFct(fd uintptr) (int64, sysErrno) {
+	var size int64
+	_, _, err := sysSyscall(sysSysIoctl, fd, C.BLKGETSIZE64, uintptr(unsafe.Pointer(&size)))
+	return size, sysErrno(err)
+}
+
+func dmTaskGetInfoFct(task *CDmTask, info *Info) int {
+	Cinfo := C.struct_dm_info{}
+	defer func() {
+		info.Exists = int(Cinfo.exists)
+		info.Suspended = int(Cinfo.suspended)
+		info.LiveTable = int(Cinfo.live_table)
+		info.InactiveTable = int(Cinfo.inactive_table)
+		info.OpenCount = int32(Cinfo.open_count)
+		info.EventNr = uint32(Cinfo.event_nr)
+		info.Major = uint32(Cinfo.major)
+		info.Minor = uint32(Cinfo.minor)
+		info.ReadOnly = int(Cinfo.read_only)
+		info.TargetCount = int32(Cinfo.target_count)
+	}()
+	return int(C.dm_task_get_info((*C.struct_dm_task)(task), &Cinfo))
+}
+
+func dmGetNextTargetFct(task *CDmTask, next uintptr, start, length *uint64, target, params *string) uintptr {
+	var (
+		Cstart, Clength      C.uint64_t
+		CtargetType, Cparams *C.char
+	)
+	defer func() {
+		*start = uint64(Cstart)
+		*length = uint64(Clength)
+		*target = C.GoString(CtargetType)
+		*params = C.GoString(Cparams)
+	}()
+
+	nextp := C.dm_get_next_target((*C.struct_dm_task)(task),
+		unsafe.Pointer(next), &Cstart, &Clength, &CtargetType, &Cparams)
+	return uintptr(nextp)
+}
+
+func dmAttachLoopDeviceFct(filename string, fd *int) string {
+	cFilename := C.CString(filename)
+	defer free(cFilename)
+
+	var cFd C.int
+	defer func() {
+		*fd = int(cFd)
+	}()
+
+	ret := C.attach_loop_device(cFilename, &cFd)
+	defer free(ret)
+	return C.GoString(ret)
+}
+
+func getBlockSizeFct(fd uintptr, size *uint64) sysErrno {
+	_, _, err := sysSyscall(sysSysIoctl, fd, C.BLKGETSIZE64, uintptr(unsafe.Pointer(&size)))
+	return sysErrno(err)
+}
+
+func dmUdevWaitFct(cookie uint) int {
+	return int(C.dm_udev_wait(C.uint32_t(cookie)))
+}
+
+func dmLogInitVerboseFct(level int) {
+	C.dm_log_init_verbose(C.int(level))
+}
+
+func logWithErrnoInitFct() {
+	C.log_with_errno_init()
+}
+
+func dmSetDevDirFct(dir string) int {
+	Cdir := C.CString(dir)
+	defer free(Cdir)
+
+	return int(C.dm_set_dev_dir(Cdir))
+}
+
+func dmGetLibraryVersionFct(version *string) int {
+	buffer := C.CString(string(make([]byte, 128)))
+	defer free(buffer)
+	defer func() {
+		*version = C.GoString(buffer)
+	}()
+	return int(C.dm_get_library_version(buffer, 128))
+}

+ 126 - 0
graphdriver/devmapper/driver.go

@@ -0,0 +1,126 @@
+package devmapper
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/graphdriver"
+	"io/ioutil"
+	"path"
+)
+
+func init() {
+	graphdriver.Register("devicemapper", Init)
+}
+
+// Placeholder interfaces, to be replaced
+// at integration.
+
+// End of placeholder interfaces.
+
+type Driver struct {
+	*DeviceSet
+	home string
+}
+
+var Init = func(home string) (graphdriver.Driver, error) {
+	deviceSet, err := NewDeviceSet(home, true)
+	if err != nil {
+		return nil, err
+	}
+	d := &Driver{
+		DeviceSet: deviceSet,
+		home:      home,
+	}
+	return d, nil
+}
+
+func (d *Driver) String() string {
+	return "devicemapper"
+}
+
+func (d *Driver) Status() [][2]string {
+	s := d.DeviceSet.Status()
+
+	status := [][2]string{
+		{"Pool Name", s.PoolName},
+		{"Data file", s.DataLoopback},
+		{"Metadata file", s.MetadataLoopback},
+		{"Data Space Used", fmt.Sprintf("%.1f Mb", float64(s.Data.Used)/(1024*1024))},
+		{"Data Space Total", fmt.Sprintf("%.1f Mb", float64(s.Data.Total)/(1024*1024))},
+		{"Metadata Space Used", fmt.Sprintf("%.1f Mb", float64(s.Metadata.Used)/(1024*1024))},
+		{"Metadata Space Total", fmt.Sprintf("%.1f Mb", float64(s.Metadata.Total)/(1024*1024))},
+	}
+	return status
+}
+
+func (d *Driver) Cleanup() error {
+	return d.DeviceSet.Shutdown()
+}
+
+func (d *Driver) Create(id, parent string) error {
+	if err := d.DeviceSet.AddDevice(id, parent); err != nil {
+		return err
+	}
+
+	mp := path.Join(d.home, "mnt", id)
+	if err := d.mount(id, mp); err != nil {
+		return err
+	}
+
+	if err := osMkdirAll(path.Join(mp, "rootfs"), 0755); err != nil && !osIsExist(err) {
+		return err
+	}
+
+	// Create an "id" file with the container/image id in it to help reconscruct this in case
+	// of later problems
+	if err := ioutil.WriteFile(path.Join(mp, "id"), []byte(id), 0600); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (d *Driver) Remove(id string) error {
+	mp := path.Join(d.home, "mnt", id)
+	if err := d.unmount(id, mp); err != nil {
+		return err
+	}
+	return d.DeviceSet.RemoveDevice(id)
+}
+
+func (d *Driver) Get(id string) (string, error) {
+	mp := path.Join(d.home, "mnt", id)
+	if err := d.mount(id, mp); err != nil {
+		return "", err
+	}
+	return path.Join(mp, "rootfs"), nil
+}
+
+func (d *Driver) mount(id, mountPoint string) error {
+	// Create the target directories if they don't exist
+	if err := osMkdirAll(mountPoint, 0755); err != nil && !osIsExist(err) {
+		return err
+	}
+	// If mountpoint is already mounted, do nothing
+	if mounted, err := Mounted(mountPoint); err != nil {
+		return fmt.Errorf("Error checking mountpoint: %s", err)
+	} else if mounted {
+		return nil
+	}
+	// Mount the device
+	return d.DeviceSet.MountDevice(id, mountPoint, false)
+}
+
+func (d *Driver) unmount(id, mountPoint string) error {
+	// If mountpoint is not mounted, do nothing
+	if mounted, err := Mounted(mountPoint); err != nil {
+		return fmt.Errorf("Error checking mountpoint: %s", err)
+	} else if !mounted {
+		return nil
+	}
+	// Unmount the device
+	return d.DeviceSet.UnmountDevice(id, mountPoint, true)
+}
+
+func (d *Driver) Exists(id string) bool {
+	return d.Devices[id] != nil
+}

+ 872 - 0
graphdriver/devmapper/driver_test.go

@@ -0,0 +1,872 @@
+package devmapper
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/graphdriver"
+	"io/ioutil"
+	"path"
+	"runtime"
+	"strings"
+	"syscall"
+	"testing"
+)
+
+func init() {
+	// Reduce the size the the base fs and loopback for the tests
+	DefaultDataLoopbackSize = 300 * 1024 * 1024
+	DefaultMetaDataLoopbackSize = 200 * 1024 * 1024
+	DefaultBaseFsSize = 300 * 1024 * 1024
+}
+
+// denyAllDevmapper mocks all calls to libdevmapper in the unit tests, and denies them by default
+func denyAllDevmapper() {
+	// Hijack all calls to libdevmapper with default panics.
+	// Authorized calls are selectively hijacked in each tests.
+	DmTaskCreate = func(t int) *CDmTask {
+		panic("DmTaskCreate: this method should not be called here")
+	}
+	DmTaskRun = func(task *CDmTask) int {
+		panic("DmTaskRun: this method should not be called here")
+	}
+	DmTaskSetName = func(task *CDmTask, name string) int {
+		panic("DmTaskSetName: this method should not be called here")
+	}
+	DmTaskSetMessage = func(task *CDmTask, message string) int {
+		panic("DmTaskSetMessage: this method should not be called here")
+	}
+	DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+		panic("DmTaskSetSector: this method should not be called here")
+	}
+	DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+		panic("DmTaskSetCookie: this method should not be called here")
+	}
+	DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+		panic("DmTaskSetAddNode: this method should not be called here")
+	}
+	DmTaskSetRo = func(task *CDmTask) int {
+		panic("DmTaskSetRo: this method should not be called here")
+	}
+	DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+		panic("DmTaskAddTarget: this method should not be called here")
+	}
+	DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+		panic("DmTaskGetInfo: this method should not be called here")
+	}
+	DmGetNextTarget = func(task *CDmTask, next uintptr, start, length *uint64, target, params *string) uintptr {
+		panic("DmGetNextTarget: this method should not be called here")
+	}
+	DmAttachLoopDevice = func(filename string, fd *int) string {
+		panic("DmAttachLoopDevice: this method should not be called here")
+	}
+	DmGetBlockSize = func(fd uintptr) (int64, sysErrno) {
+		panic("DmGetBlockSize: this method should not be called here")
+	}
+	DmUdevWait = func(cookie uint) int {
+		panic("DmUdevWait: this method should not be called here")
+	}
+	DmSetDevDir = func(dir string) int {
+		panic("DmSetDevDir: this method should not be called here")
+	}
+	DmGetLibraryVersion = func(version *string) int {
+		panic("DmGetLibraryVersion: this method should not be called here")
+	}
+	DmLogInitVerbose = func(level int) {
+		panic("DmLogInitVerbose: this method should not be called here")
+	}
+	DmTaskDestroy = func(task *CDmTask) {
+		panic("DmTaskDestroy: this method should not be called here")
+	}
+	GetBlockSize = func(fd uintptr, size *uint64) sysErrno {
+		panic("GetBlockSize: this method should not be called here")
+	}
+	LogWithErrnoInit = func() {
+		panic("LogWithErrnoInit: this method should not be called here")
+	}
+}
+
+func denyAllSyscall() {
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		panic("sysMount: this method should not be called here")
+	}
+	sysUnmount = func(target string, flags int) (err error) {
+		panic("sysUnmount: this method should not be called here")
+	}
+	sysCloseOnExec = func(fd int) {
+		panic("sysCloseOnExec: this method should not be called here")
+	}
+	sysSyscall = func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+		panic("sysSyscall: this method should not be called here")
+	}
+	// Not a syscall, but forbidding it here anyway
+	Mounted = func(mnt string) (bool, error) {
+		panic("devmapper.Mounted: this method should not be called here")
+	}
+	// osOpenFile = os.OpenFile
+	// osNewFile = os.NewFile
+	// osCreate = os.Create
+	// osStat = os.Stat
+	// osIsNotExist = os.IsNotExist
+	// osIsExist = os.IsExist
+	// osMkdirAll = os.MkdirAll
+	// osRemoveAll = os.RemoveAll
+	// osRename = os.Rename
+	// osReadlink = os.Readlink
+
+	// execRun = func(name string, args ...string) error {
+	// 	return exec.Command(name, args...).Run()
+	// }
+}
+
+func mkTestDirectory(t *testing.T) string {
+	dir, err := ioutil.TempDir("", "docker-test-devmapper-")
+	if err != nil {
+		t.Fatal(err)
+	}
+	return dir
+}
+
+func newDriver(t *testing.T) *Driver {
+	home := mkTestDirectory(t)
+	d, err := Init(home)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return d.(*Driver)
+}
+
+func cleanup(d *Driver) {
+	d.Cleanup()
+	osRemoveAll(d.home)
+}
+
+type Set map[string]bool
+
+func (r Set) Assert(t *testing.T, names ...string) {
+	for _, key := range names {
+		if _, exists := r[key]; !exists {
+			t.Fatalf("Key not set: %s", key)
+		}
+		delete(r, key)
+	}
+	if len(r) != 0 {
+		t.Fatalf("Unexpected keys: %v", r)
+	}
+}
+
+func TestInit(t *testing.T) {
+	var (
+		calls           = make(Set)
+		devicesAttached = make(Set)
+		taskMessages    = make(Set)
+		taskTypes       = make(Set)
+		home            = mkTestDirectory(t)
+	)
+	defer osRemoveAll(home)
+
+	func() {
+		denyAllDevmapper()
+		DmSetDevDir = func(dir string) int {
+			calls["DmSetDevDir"] = true
+			expectedDir := "/dev"
+			if dir != expectedDir {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmSetDevDir(%v)\nReceived: DmSetDevDir(%v)\n", expectedDir, dir)
+			}
+			return 0
+		}
+		LogWithErrnoInit = func() {
+			calls["DmLogWithErrnoInit"] = true
+		}
+		var task1 CDmTask
+		DmTaskCreate = func(taskType int) *CDmTask {
+			calls["DmTaskCreate"] = true
+			taskTypes[fmt.Sprintf("%d", taskType)] = true
+			return &task1
+		}
+		DmTaskSetName = func(task *CDmTask, name string) int {
+			calls["DmTaskSetName"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetName(%v)\nReceived: DmTaskSetName(%v)\n", expectedTask, task)
+			}
+			// FIXME: use Set.AssertRegexp()
+			if !strings.HasPrefix(name, "docker-") && !strings.HasPrefix(name, "/dev/mapper/docker-") ||
+				!strings.HasSuffix(name, "-pool") && !strings.HasSuffix(name, "-base") {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetName(%v)\nReceived: DmTaskSetName(%v)\n", "docker-...-pool", name)
+			}
+			return 1
+		}
+		DmTaskRun = func(task *CDmTask) int {
+			calls["DmTaskRun"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskRun(%v)\nReceived: DmTaskRun(%v)\n", expectedTask, task)
+			}
+			return 1
+		}
+		DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+			calls["DmTaskGetInfo"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskGetInfo(%v)\nReceived: DmTaskGetInfo(%v)\n", expectedTask, task)
+			}
+			// This will crash if info is not dereferenceable
+			info.Exists = 0
+			return 1
+		}
+		DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+			calls["DmTaskSetSector"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetSector(%v)\nReceived: DmTaskSetSector(%v)\n", expectedTask, task)
+			}
+			if expectedSector := uint64(0); sector != expectedSector {
+				t.Fatalf("Wrong libdevmapper call to DmTaskSetSector\nExpected: %v\nReceived: %v\n", expectedSector, sector)
+			}
+			return 1
+		}
+		DmTaskSetMessage = func(task *CDmTask, message string) int {
+			calls["DmTaskSetMessage"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetSector(%v)\nReceived: DmTaskSetSector(%v)\n", expectedTask, task)
+			}
+			taskMessages[message] = true
+			return 1
+		}
+		var (
+			fakeDataLoop       = "/dev/loop42"
+			fakeMetadataLoop   = "/dev/loop43"
+			fakeDataLoopFd     = 42
+			fakeMetadataLoopFd = 43
+		)
+		var attachCount int
+		DmAttachLoopDevice = func(filename string, fd *int) string {
+			calls["DmAttachLoopDevice"] = true
+			if _, exists := devicesAttached[filename]; exists {
+				t.Fatalf("Already attached %s", filename)
+			}
+			devicesAttached[filename] = true
+			// This will crash if fd is not dereferenceable
+			if attachCount == 0 {
+				attachCount++
+				*fd = fakeDataLoopFd
+				return fakeDataLoop
+			} else {
+				*fd = fakeMetadataLoopFd
+				return fakeMetadataLoop
+			}
+		}
+		DmTaskDestroy = func(task *CDmTask) {
+			calls["DmTaskDestroy"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+		}
+		fakeBlockSize := int64(4242 * 512)
+		DmGetBlockSize = func(fd uintptr) (int64, sysErrno) {
+			calls["DmGetBlockSize"] = true
+			if expectedFd := uintptr(42); fd != expectedFd {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmGetBlockSize(%v)\nReceived: DmGetBlockSize(%v)\n", expectedFd, fd)
+			}
+			return fakeBlockSize, 0
+		}
+		DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+			calls["DmTaskSetTarget"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+			if start != 0 {
+				t.Fatalf("Wrong start: %d != %d", start, 0)
+			}
+			if ttype != "thin" && ttype != "thin-pool" {
+				t.Fatalf("Wrong ttype: %s", ttype)
+			}
+			// Quick smoke test
+			if params == "" {
+				t.Fatalf("Params should not be empty")
+			}
+			return 1
+		}
+		fakeCookie := uint(4321)
+		DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+			calls["DmTaskSetCookie"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+			if flags != 0 {
+				t.Fatalf("Cookie flags should be 0 (not %x)", flags)
+			}
+			*cookie = fakeCookie
+			return 1
+		}
+		DmUdevWait = func(cookie uint) int {
+			calls["DmUdevWait"] = true
+			if cookie != fakeCookie {
+				t.Fatalf("Wrong cookie: %d != %d", cookie, fakeCookie)
+			}
+			return 1
+		}
+		DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+			if addNode != AddNodeOnCreate {
+				t.Fatalf("Wrong AddNoteType: %v (expected %v)", addNode, AddNodeOnCreate)
+			}
+			calls["DmTaskSetAddNode"] = true
+			return 1
+		}
+		execRun = func(name string, args ...string) error {
+			calls["execRun"] = true
+			if name != "mkfs.ext4" {
+				t.Fatalf("Expected %s to be executed, not %s", "mkfs.ext4", name)
+			}
+			return nil
+		}
+		driver, err := Init(home)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer func() {
+			if err := driver.Cleanup(); err != nil {
+				t.Fatal(err)
+			}
+		}()
+	}()
+	// Put all tests in a funciton to make sure the garbage collection will
+	// occur.
+
+	// Call GC to cleanup runtime.Finalizers
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmSetDevDir",
+		"DmLogWithErrnoInit",
+		"DmTaskSetName",
+		"DmTaskRun",
+		"DmTaskGetInfo",
+		"DmAttachLoopDevice",
+		"DmTaskDestroy",
+		"execRun",
+		"DmTaskCreate",
+		"DmGetBlockSize",
+		"DmTaskSetTarget",
+		"DmTaskSetCookie",
+		"DmUdevWait",
+		"DmTaskSetSector",
+		"DmTaskSetMessage",
+		"DmTaskSetAddNode",
+	)
+	devicesAttached.Assert(t, path.Join(home, "devicemapper", "data"), path.Join(home, "devicemapper", "metadata"))
+	taskTypes.Assert(t, "0", "6", "17")
+	taskMessages.Assert(t, "create_thin 0", "set_transaction_id 0 1")
+}
+
+func fakeInit() func(home string) (graphdriver.Driver, error) {
+	oldInit := Init
+	Init = func(home string) (graphdriver.Driver, error) {
+		return &Driver{
+			home: home,
+		}, nil
+	}
+	return oldInit
+}
+
+func restoreInit(init func(home string) (graphdriver.Driver, error)) {
+	Init = init
+}
+
+func mockAllDevmapper(calls Set) {
+	DmSetDevDir = func(dir string) int {
+		calls["DmSetDevDir"] = true
+		return 0
+	}
+	LogWithErrnoInit = func() {
+		calls["DmLogWithErrnoInit"] = true
+	}
+	DmTaskCreate = func(taskType int) *CDmTask {
+		calls["DmTaskCreate"] = true
+		return &CDmTask{}
+	}
+	DmTaskSetName = func(task *CDmTask, name string) int {
+		calls["DmTaskSetName"] = true
+		return 1
+	}
+	DmTaskRun = func(task *CDmTask) int {
+		calls["DmTaskRun"] = true
+		return 1
+	}
+	DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+		calls["DmTaskGetInfo"] = true
+		return 1
+	}
+	DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+		calls["DmTaskSetSector"] = true
+		return 1
+	}
+	DmTaskSetMessage = func(task *CDmTask, message string) int {
+		calls["DmTaskSetMessage"] = true
+		return 1
+	}
+	DmAttachLoopDevice = func(filename string, fd *int) string {
+		calls["DmAttachLoopDevice"] = true
+		return "/dev/loop42"
+	}
+	DmTaskDestroy = func(task *CDmTask) {
+		calls["DmTaskDestroy"] = true
+	}
+	DmGetBlockSize = func(fd uintptr) (int64, sysErrno) {
+		calls["DmGetBlockSize"] = true
+		return int64(4242 * 512), 0
+	}
+	DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+		calls["DmTaskSetTarget"] = true
+		return 1
+	}
+	DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+		calls["DmTaskSetCookie"] = true
+		return 1
+	}
+	DmUdevWait = func(cookie uint) int {
+		calls["DmUdevWait"] = true
+		return 1
+	}
+	DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+		calls["DmTaskSetAddNode"] = true
+		return 1
+	}
+	execRun = func(name string, args ...string) error {
+		calls["execRun"] = true
+		return nil
+	}
+}
+
+func TestDriverName(t *testing.T) {
+	denyAllDevmapper()
+	defer denyAllDevmapper()
+
+	oldInit := fakeInit()
+	defer restoreInit(oldInit)
+
+	d := newDriver(t)
+	if d.String() != "devicemapper" {
+		t.Fatalf("Expected driver name to be devicemapper got %s", d.String())
+	}
+}
+
+func TestDriverCreate(t *testing.T) {
+	denyAllDevmapper()
+	denyAllSyscall()
+	defer denyAllSyscall()
+	defer denyAllDevmapper()
+
+	calls := make(Set)
+	mockAllDevmapper(calls)
+
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		calls["sysMount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedSource := "/dev/mapper/docker-"; !strings.HasPrefix(source, expectedSource) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedSource, source)
+		}
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFstype := "ext4"; fstype != expectedFstype {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFstype, fstype)
+		}
+		if expectedFlags := uintptr(3236757504); flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+
+	Mounted = func(mnt string) (bool, error) {
+		calls["Mounted"] = true
+		if !strings.HasPrefix(mnt, "/tmp/docker-test-devmapper-") || !strings.HasSuffix(mnt, "/mnt/1") {
+			t.Fatalf("Wrong mounted call\nExpected: Mounted(%v)\nReceived: Mounted(%v)\n", "/tmp/docker-test-devmapper-.../mnt/1", mnt)
+		}
+		return false, nil
+	}
+
+	func() {
+		d := newDriver(t)
+
+		calls.Assert(t,
+			"DmSetDevDir",
+			"DmLogWithErrnoInit",
+			"DmTaskSetName",
+			"DmTaskRun",
+			"DmTaskGetInfo",
+			"DmAttachLoopDevice",
+			"execRun",
+			"DmTaskCreate",
+			"DmGetBlockSize",
+			"DmTaskSetTarget",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetSector",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+		if err := d.Create("1", ""); err != nil {
+			t.Fatal(err)
+		}
+		calls.Assert(t,
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"sysMount",
+			"Mounted",
+			"DmTaskRun",
+			"DmTaskSetTarget",
+			"DmTaskSetSector",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+	}()
+
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmTaskDestroy",
+	)
+}
+
+func TestDriverRemove(t *testing.T) {
+	denyAllDevmapper()
+	denyAllSyscall()
+	defer denyAllSyscall()
+	defer denyAllDevmapper()
+
+	calls := make(Set)
+	mockAllDevmapper(calls)
+
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		calls["sysMount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedSource := "/dev/mapper/docker-"; !strings.HasPrefix(source, expectedSource) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedSource, source)
+		}
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFstype := "ext4"; fstype != expectedFstype {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFstype, fstype)
+		}
+		if expectedFlags := uintptr(3236757504); flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+	sysUnmount = func(target string, flags int) (err error) {
+		calls["sysUnmount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFlags := 0; flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+	Mounted = func(mnt string) (bool, error) {
+		calls["Mounted"] = true
+		return false, nil
+	}
+
+	func() {
+		d := newDriver(t)
+
+		calls.Assert(t,
+			"DmSetDevDir",
+			"DmLogWithErrnoInit",
+			"DmTaskSetName",
+			"DmTaskRun",
+			"DmTaskGetInfo",
+			"DmAttachLoopDevice",
+			"execRun",
+			"DmTaskCreate",
+			"DmGetBlockSize",
+			"DmTaskSetTarget",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetSector",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+		if err := d.Create("1", ""); err != nil {
+			t.Fatal(err)
+		}
+
+		calls.Assert(t,
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"sysMount",
+			"Mounted",
+			"DmTaskRun",
+			"DmTaskSetTarget",
+			"DmTaskSetSector",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+		Mounted = func(mnt string) (bool, error) {
+			calls["Mounted"] = true
+			return true, nil
+		}
+
+		if err := d.Remove("1"); err != nil {
+			t.Fatal(err)
+		}
+
+		calls.Assert(t,
+			"DmTaskRun",
+			"DmTaskSetSector",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"Mounted",
+			"sysUnmount",
+		)
+	}()
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmTaskDestroy",
+	)
+}
+
+func TestCleanup(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skip("Unimplemented")
+	d := newDriver(t)
+	defer osRemoveAll(d.home)
+
+	mountPoints := make([]string, 2)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	// Mount the id
+	p, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	mountPoints[0] = p
+
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	p, err = d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	mountPoints[1] = p
+
+	// Ensure that all the mount points are currently mounted
+	for _, p := range mountPoints {
+		if mounted, err := Mounted(p); err != nil {
+			t.Fatal(err)
+		} else if !mounted {
+			t.Fatalf("Expected %s to be mounted", p)
+		}
+	}
+
+	// Ensure that devices are active
+	for _, p := range []string{"1", "2"} {
+		if !d.HasActivatedDevice(p) {
+			t.Fatalf("Expected %s to have an active device", p)
+		}
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure that all the mount points are no longer mounted
+	for _, p := range mountPoints {
+		if mounted, err := Mounted(p); err != nil {
+			t.Fatal(err)
+		} else if mounted {
+			t.Fatalf("Expected %s to not be mounted", p)
+		}
+	}
+
+	// Ensure that devices are no longer activated
+	for _, p := range []string{"1", "2"} {
+		if d.HasActivatedDevice(p) {
+			t.Fatalf("Expected %s not be an active device", p)
+		}
+	}
+}
+
+func TestNotMounted(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skip("Not implemented")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	mounted, err := Mounted(path.Join(d.home, "mnt", "1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mounted {
+		t.Fatal("Id 1 should not be mounted")
+	}
+}
+
+func TestMounted(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	mounted, err := Mounted(path.Join(d.home, "mnt", "1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !mounted {
+		t.Fatal("Id 1 should be mounted")
+	}
+}
+
+func TestInitCleanedDriver(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+
+	driver, err := Init(d.home)
+	if err != nil {
+		t.Fatal(err)
+	}
+	d = driver.(*Driver)
+	defer cleanup(d)
+
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMountMountedDriver(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	// Perform get on same id to ensure that it will
+	// not be mounted twice
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestGetReturnsValidDevice(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if !d.HasDevice("1") {
+		t.Fatalf("Expected id 1 to be in device set")
+	}
+
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	if !d.HasActivatedDevice("1") {
+		t.Fatalf("Expected id 1 to be activated")
+	}
+
+	if !d.HasInitializedDevice("1") {
+		t.Fatalf("Expected id 1 to be initialized")
+	}
+}
+
+func TestDriverGetSize(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skipf("Size is currently not implemented")
+
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	mountPoint, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	size := int64(1024)
+
+	f, err := osCreate(path.Join(mountPoint, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	// diffSize, err := d.DiffSize("1")
+	// if err != nil {
+	// 	t.Fatal(err)
+	// }
+	// if diffSize != size {
+	// 	t.Fatalf("Expected size %d got %d", size, diffSize)
+	// }
+}
+
+func assertMap(t *testing.T, m map[string]bool, keys ...string) {
+	for _, key := range keys {
+		if _, exists := m[key]; !exists {
+			t.Fatalf("Key not set: %s", key)
+		}
+		delete(m, key)
+	}
+	if len(m) != 0 {
+		t.Fatalf("Unexpected keys: %v", m)
+	}
+}

+ 25 - 0
graphdriver/devmapper/mount.go

@@ -0,0 +1,25 @@
+package devmapper
+
+import (
+	"path/filepath"
+)
+
+// FIXME: this is copy-pasted from the aufs driver.
+// It should be moved into the core.
+
+var Mounted = func(mountpoint string) (bool, error) {
+	mntpoint, err := osStat(mountpoint)
+	if err != nil {
+		if osIsNotExist(err) {
+			return false, nil
+		}
+		return false, err
+	}
+	parent, err := osStat(filepath.Join(mountpoint, ".."))
+	if err != nil {
+		return false, err
+	}
+	mntpointSt := toSysStatT(mntpoint.Sys())
+	parentSt := toSysStatT(parent.Sys())
+	return mntpointSt.Dev != parentSt.Dev, nil
+}

+ 50 - 0
graphdriver/devmapper/sys.go

@@ -0,0 +1,50 @@
+package devmapper
+
+import (
+	"os"
+	"os/exec"
+	"syscall"
+)
+
+type (
+	sysStatT syscall.Stat_t
+	sysErrno syscall.Errno
+
+	osFile struct{ *os.File }
+)
+
+var (
+	sysMount       = syscall.Mount
+	sysUnmount     = syscall.Unmount
+	sysCloseOnExec = syscall.CloseOnExec
+	sysSyscall     = syscall.Syscall
+
+	osOpenFile   = os.OpenFile
+	osNewFile    = os.NewFile
+	osCreate     = os.Create
+	osStat       = os.Stat
+	osIsNotExist = os.IsNotExist
+	osIsExist    = os.IsExist
+	osMkdirAll   = os.MkdirAll
+	osRemoveAll  = os.RemoveAll
+	osRename     = os.Rename
+	osReadlink   = os.Readlink
+
+	execRun = func(name string, args ...string) error {
+		return exec.Command(name, args...).Run()
+	}
+)
+
+const (
+	sysMsMgcVal = syscall.MS_MGC_VAL
+	sysMsRdOnly = syscall.MS_RDONLY
+	sysEInval   = syscall.EINVAL
+	sysSysIoctl = syscall.SYS_IOCTL
+
+	osORdWr   = os.O_RDWR
+	osOCreate = os.O_CREATE
+)
+
+func toSysStatT(i interface{}) *sysStatT {
+	return (*sysStatT)(i.(*syscall.Stat_t))
+}

+ 90 - 0
graphdriver/driver.go

@@ -0,0 +1,90 @@
+package graphdriver
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/utils"
+	"os"
+	"path"
+)
+
+type InitFunc func(root string) (Driver, error)
+
+type Driver interface {
+	String() string
+
+	Create(id, parent string) error
+	Remove(id string) error
+
+	Get(id string) (dir string, err error)
+	Exists(id string) bool
+
+	Status() [][2]string
+
+	Cleanup() error
+}
+
+type Differ interface {
+	Diff(id string) (archive.Archive, error)
+	Changes(id string) ([]archive.Change, error)
+	ApplyDiff(id string, diff archive.Archive) error
+	DiffSize(id string) (bytes int64, err error)
+}
+
+var (
+	DefaultDriver string
+	// All registred drivers
+	drivers map[string]InitFunc
+	// Slice of drivers that should be used in an order
+	priority = []string{
+		"aufs",
+		"devicemapper",
+		"vfs",
+	}
+)
+
+func init() {
+	drivers = make(map[string]InitFunc)
+}
+
+func Register(name string, initFunc InitFunc) error {
+	if _, exists := drivers[name]; exists {
+		return fmt.Errorf("Name already registered %s", name)
+	}
+	drivers[name] = initFunc
+
+	return nil
+}
+
+func GetDriver(name, home string) (Driver, error) {
+	if initFunc, exists := drivers[name]; exists {
+		return initFunc(path.Join(home, name))
+	}
+	return nil, fmt.Errorf("No such driver: %s", name)
+}
+
+func New(root string) (driver Driver, err error) {
+	for _, name := range []string{os.Getenv("DOCKER_DRIVER"), DefaultDriver} {
+		if name != "" {
+			return GetDriver(name, root)
+		}
+	}
+
+	// Check for priority drivers first
+	for _, name := range priority {
+		if driver, err = GetDriver(name, root); err != nil {
+			utils.Debugf("Error loading driver %s: %s", name, err)
+			continue
+		}
+		return driver, nil
+	}
+
+	// Check all registered drivers if no priority driver is found
+	for _, initFunc := range drivers {
+		if driver, err = initFunc(root); err != nil {
+			continue
+		}
+		return driver, nil
+	}
+	return nil, err
+}

+ 91 - 0
graphdriver/vfs/driver.go

@@ -0,0 +1,91 @@
+package vfs
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/graphdriver"
+	"os"
+	"os/exec"
+	"path"
+)
+
+func init() {
+	graphdriver.Register("vfs", Init)
+}
+
+func Init(home string) (graphdriver.Driver, error) {
+	d := &Driver{
+		home: home,
+	}
+	return d, nil
+}
+
+type Driver struct {
+	home string
+}
+
+func (d *Driver) String() string {
+	return "vfs"
+}
+
+func (d *Driver) Status() [][2]string {
+	return nil
+}
+
+func (d *Driver) Cleanup() error {
+	return nil
+}
+
+func copyDir(src, dst string) error {
+	cmd := exec.Command("cp", "-aT", "--reflink=auto", src, dst)
+	if err := cmd.Run(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (d *Driver) Create(id string, parent string) error {
+	dir := d.dir(id)
+	if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
+		return err
+	}
+	if err := os.Mkdir(dir, 0700); err != nil {
+		return err
+	}
+	if parent == "" {
+		return nil
+	}
+	parentDir, err := d.Get(parent)
+	if err != nil {
+		return fmt.Errorf("%s: %s", parent, err)
+	}
+	if err := copyDir(parentDir, dir); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (d *Driver) dir(id string) string {
+	return path.Join(d.home, "dir", path.Base(id))
+}
+
+func (d *Driver) Remove(id string) error {
+	if _, err := os.Stat(d.dir(id)); err != nil {
+		return err
+	}
+	return os.RemoveAll(d.dir(id))
+}
+
+func (d *Driver) Get(id string) (string, error) {
+	dir := d.dir(id)
+	if st, err := os.Stat(dir); err != nil {
+		return "", err
+	} else if !st.IsDir() {
+		return "", fmt.Errorf("%s: not a directory", dir)
+	}
+	return dir, nil
+}
+
+func (d *Driver) Exists(id string) bool {
+	_, err := os.Stat(d.dir(id))
+	return err == nil
+}

+ 14 - 3
hack/make.sh

@@ -37,13 +37,24 @@ DEFAULT_BUNDLES=(
 	test
 	dynbinary
 	dyntest
+	tgz
 	ubuntu
 )
 
 VERSION=$(cat ./VERSION)
-GITCOMMIT=$(git rev-parse --short HEAD)
-if [ -n "$(git status --porcelain)" ]; then
-	GITCOMMIT="$GITCOMMIT-dirty"
+if [ -d .git ] && command -v git &> /dev/null; then
+	GITCOMMIT=$(git rev-parse --short HEAD)
+	if [ -n "$(git status --porcelain)" ]; then
+		GITCOMMIT="$GITCOMMIT-dirty"
+	fi
+elif [ "$DOCKER_GITCOMMIT" ]; then
+	GITCOMMIT="$DOCKER_GITCOMMIT"
+else
+	echo >&2 'error: .git directory missing and DOCKER_GITCOMMIT not specified'
+	echo >&2 '  Please either build with the .git directory accessible, or specify the'
+	echo >&2 '  exact (--short) commit hash you are building using DOCKER_GITCOMMIT for'
+	echo >&2 '  future accountability in diagnosing build issues.  Thanks!'
+	exit 1
 fi
 
 # Use these flags when compiling the tests and final binary

+ 23 - 0
hack/make/tgz

@@ -0,0 +1,23 @@
+#!/bin/sh
+
+DEST="$1"
+BINARY="$DEST/../binary/docker-$VERSION"
+TGZ="$DEST/docker-$VERSION.tgz"
+
+set -e
+
+if [ ! -x "$BINARY" ]; then
+	echo >&2 'error: binary must be run before tgz'
+	false
+fi
+
+mkdir -p "$DEST/build"
+
+mkdir -p "$DEST/build/usr/local/bin"
+cp -L "$BINARY" "$DEST/build/usr/local/bin/docker"
+
+tar --numeric-owner --owner 0 -C "$DEST/build" -czf "$TGZ" usr
+
+rm -rf "$DEST/build"
+
+echo "Created tgz: $TGZ"

+ 18 - 0
hack/release.sh

@@ -47,6 +47,7 @@ cd /go/src/github.com/dotcloud/docker
 
 RELEASE_BUNDLES=(
 	binary
+	tgz
 	ubuntu
 )
 
@@ -188,6 +189,22 @@ EOF
 	echo "APT repository uploaded. Instructions available at $(s3_url)/ubuntu"
 }
 
+# Upload a tgz to S3
+release_tgz() {
+	[ -e bundles/$VERSION/tgz/docker-$VERSION.tgz ] || {
+		echo >&2 './hack/make.sh must be run before release_binary'
+		exit 1
+	}
+
+	S3DIR=s3://$BUCKET/builds/Linux/x86_64
+	s3cmd --acl-public put bundles/$VERSION/tgz/docker-$VERSION.tgz $S3DIR/docker-$VERSION.tgz
+
+	if [ -z "$NOLATEST" ]; then
+		echo "Copying docker-$VERSION.tgz to docker-latest.tgz"
+		s3cmd --acl-public cp $S3DIR/docker-$VERSION.tgz $S3DIR/docker-latest.tgz
+	fi
+}
+
 # Upload a static binary to S3
 release_binary() {
 	[ -e bundles/$VERSION/binary/docker-$VERSION ] || {
@@ -230,6 +247,7 @@ release_test() {
 main() {
 	setup_s3
 	release_binary
+	release_tgz
 	release_ubuntu
 	release_index
 	release_test

+ 89 - 163
image.go

@@ -6,17 +6,14 @@ import (
 	"encoding/json"
 	"fmt"
 	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io/ioutil"
-	"log"
 	"os"
-	"os/exec"
 	"path"
-	"path/filepath"
 	"strconv"
 	"strings"
-	"syscall"
 	"time"
 )
 
@@ -62,39 +59,56 @@ func LoadImage(root string) (*Image, error) {
 		img.Size = int64(size)
 	}
 
-	// Check that the filesystem layer exists
-	if stat, err := os.Stat(layerPath(root)); err != nil {
-		if os.IsNotExist(err) {
-			return nil, fmt.Errorf("Couldn't load image %s: no filesystem layer", img.ID)
-		}
-		return nil, err
-	} else if !stat.IsDir() {
-		return nil, fmt.Errorf("Couldn't load image %s: %s is not a directory", img.ID, layerPath(root))
-	}
 	return img, nil
 }
 
-func StoreImage(img *Image, jsonData []byte, layerData archive.Archive, root string) error {
-	// Check that root doesn't already exist
-	if _, err := os.Stat(root); err == nil {
-		return fmt.Errorf("Image %s already exists", img.ID)
-	} else if !os.IsNotExist(err) {
-		return err
-	}
+func StoreImage(img *Image, jsonData []byte, layerData archive.Archive, root, layer string) error {
 	// Store the layer
-	layer := layerPath(root)
+	var (
+		size   int64
+		err    error
+		driver = img.graph.driver
+	)
 	if err := os.MkdirAll(layer, 0755); err != nil {
 		return err
 	}
 
 	// If layerData is not nil, unpack it into the new layer
 	if layerData != nil {
-		start := time.Now()
-		utils.Debugf("Start untar layer")
-		if err := archive.Untar(layerData, layer); err != nil {
-			return err
+		if differ, ok := driver.(graphdriver.Differ); ok {
+			if err := differ.ApplyDiff(img.ID, layerData); err != nil {
+				return err
+			}
+
+			if size, err = differ.DiffSize(img.ID); err != nil {
+				return err
+			}
+		} else {
+			start := time.Now().UTC()
+			utils.Debugf("Start untar layer")
+			if err := archive.ApplyLayer(layer, layerData); err != nil {
+				return err
+			}
+			utils.Debugf("Untar time: %vs", time.Now().UTC().Sub(start).Seconds())
+
+			if img.Parent == "" {
+				if size, err = utils.TreeSize(layer); err != nil {
+					return err
+				}
+			} else {
+				parent, err := driver.Get(img.Parent)
+				if err != nil {
+					return err
+				}
+				changes, err := archive.ChangesDirs(layer, parent)
+				if err != nil {
+					return err
+				}
+				if size = archive.ChangesSize(layer, changes); err != nil {
+					return err
+				}
+			}
 		}
-		utils.Debugf("Untar time: %vs", time.Now().Sub(start).Seconds())
 	}
 
 	// If raw json is provided, then use it
@@ -102,117 +116,60 @@ func StoreImage(img *Image, jsonData []byte, layerData archive.Archive, root str
 		return ioutil.WriteFile(jsonPath(root), jsonData, 0600)
 	}
 	// Otherwise, unmarshal the image
-	jsonData, err := json.Marshal(img)
-	if err != nil {
+	if jsonData, err = json.Marshal(img); err != nil {
 		return err
 	}
 	if err := ioutil.WriteFile(jsonPath(root), jsonData, 0600); err != nil {
 		return err
 	}
 
-	return StoreSize(img, root)
-}
-
-func StoreSize(img *Image, root string) error {
-	layer := layerPath(root)
-	data := make(map[uint64]bool)
-
-	var totalSize int64
-	filepath.Walk(layer, func(path string, fileInfo os.FileInfo, err error) error {
-		size := fileInfo.Size()
-		if size == 0 {
-			return nil
-		}
-
-		inode := fileInfo.Sys().(*syscall.Stat_t).Ino
-		if _, entryExists := data[inode]; entryExists {
-			return nil
-		}
-		data[inode] = false
-
-		totalSize += size
-		return nil
-	})
-	img.Size = totalSize
-
-	if err := ioutil.WriteFile(path.Join(root, "layersize"), []byte(strconv.Itoa(int(totalSize))), 0600); err != nil {
-		return nil
+	img.Size = size
+	if err := img.SaveSize(root); err != nil {
+		return err
 	}
 
 	return nil
 }
 
-func layerPath(root string) string {
-	return path.Join(root, "layer")
+// SaveSize stores the current `size` value of `img` in the directory `root`.
+func (img *Image) SaveSize(root string) error {
+	if err := ioutil.WriteFile(path.Join(root, "layersize"), []byte(strconv.Itoa(int(img.Size))), 0600); err != nil {
+		return fmt.Errorf("Error storing image size in %s/layersize: %s", root, err)
+	}
+	return nil
 }
 
 func jsonPath(root string) string {
 	return path.Join(root, "json")
 }
 
-func MountAUFS(ro []string, rw string, target string) error {
-	// FIXME: Now mount the layers
-	rwBranch := fmt.Sprintf("%v=rw", rw)
-	roBranches := ""
-	for _, layer := range ro {
-		roBranches += fmt.Sprintf("%v=ro+wh:", layer)
-	}
-	branches := fmt.Sprintf("br:%v:%v", rwBranch, roBranches)
-
-	branches += ",xino=/dev/shm/aufs.xino"
-
-	//if error, try to load aufs kernel module
-	if err := mount("none", target, "aufs", 0, branches); err != nil {
-		log.Printf("Kernel does not support AUFS, trying to load the AUFS module with modprobe...")
-		if err := exec.Command("modprobe", "aufs").Run(); err != nil {
-			return fmt.Errorf("Unable to load the AUFS module")
-		}
-		log.Printf("...module loaded.")
-		if err := mount("none", target, "aufs", 0, branches); err != nil {
-			return fmt.Errorf("Unable to mount using aufs")
-		}
-	}
-	return nil
-}
-
 // TarLayer returns a tar archive of the image's filesystem layer.
-func (img *Image) TarLayer(compression archive.Compression) (archive.Archive, error) {
-	layerPath, err := img.layer()
-	if err != nil {
-		return nil, err
-	}
-	return archive.Tar(layerPath, compression)
-}
-
-func (img *Image) Mount(root, rw string) error {
-	if mounted, err := Mounted(root); err != nil {
-		return err
-	} else if mounted {
-		return fmt.Errorf("%s is already mounted", root)
-	}
-	layers, err := img.layers()
-	if err != nil {
-		return err
-	}
-	// Create the target directories if they don't exist
-	if err := os.Mkdir(root, 0755); err != nil && !os.IsExist(err) {
-		return err
-	}
-	if err := os.Mkdir(rw, 0755); err != nil && !os.IsExist(err) {
-		return err
+func (img *Image) TarLayer() (archive.Archive, error) {
+	if img.graph == nil {
+		return nil, fmt.Errorf("Can't load storage driver for unregistered image %s", img.ID)
 	}
-	if err := MountAUFS(layers, rw, root); err != nil {
-		return err
+	driver := img.graph.driver
+	if differ, ok := driver.(graphdriver.Differ); ok {
+		return differ.Diff(img.ID)
 	}
-	return nil
-}
 
-func (img *Image) Changes(rw string) ([]Change, error) {
-	layers, err := img.layers()
+	imgFs, err := driver.Get(img.ID)
 	if err != nil {
 		return nil, err
 	}
-	return Changes(layers, rw)
+	if img.Parent == "" {
+		return archive.Tar(imgFs, archive.Uncompressed)
+	} else {
+		parentFs, err := driver.Get(img.Parent)
+		if err != nil {
+			return nil, err
+		}
+		changes, err := archive.ChangesDirs(imgFs, parentFs)
+		if err != nil {
+			return nil, err
+		}
+		return archive.ExportChanges(imgFs, changes)
+	}
 }
 
 func ValidateID(id string) error {
@@ -250,40 +207,6 @@ func (img *Image) History() ([]*Image, error) {
 	return parents, nil
 }
 
-// layers returns all the filesystem layers needed to mount an image
-// FIXME: @shykes refactor this function with the new error handling
-//        (I'll do it if I have time tonight, I focus on the rest)
-func (img *Image) layers() ([]string, error) {
-	var (
-		list []string
-		e    error
-	)
-	if err := img.WalkHistory(
-		func(img *Image) (err error) {
-			if layer, err := img.layer(); err != nil {
-				e = err
-			} else if layer != "" {
-				list = append(list, layer)
-			}
-			return err
-		},
-	); err != nil {
-		return nil, err
-	} else if e != nil { // Did an error occur inside the handler?
-		return nil, e
-	}
-	if len(list) == 0 {
-		return nil, fmt.Errorf("No layer found for image %s\n", img.ID)
-	}
-
-	// Inject the dockerinit layer (empty place-holder for mount-binding dockerinit)
-	dockerinitLayer, err := img.getDockerInitLayer()
-	if err != nil {
-		return nil, err
-	}
-	return append([]string{dockerinitLayer}, list...), nil
-}
-
 func (img *Image) WalkHistory(handler func(*Image) error) (err error) {
 	currentImg := img
 	for currentImg != nil {
@@ -310,13 +233,6 @@ func (img *Image) GetParent() (*Image, error) {
 	return img.graph.Get(img.Parent)
 }
 
-func (img *Image) getDockerInitLayer() (string, error) {
-	if img.graph == nil {
-		return "", fmt.Errorf("Can't lookup dockerinit layer of unregistered image")
-	}
-	return img.graph.getDockerInitLayer()
-}
-
 func (img *Image) root() (string, error) {
 	if img.graph == nil {
 		return "", fmt.Errorf("Can't lookup root of unregistered image")
@@ -324,15 +240,6 @@ func (img *Image) root() (string, error) {
 	return img.graph.imageRoot(img.ID), nil
 }
 
-// Return the path of an image's layer
-func (img *Image) layer() (string, error) {
-	root, err := img.root()
-	if err != nil {
-		return "", err
-	}
-	return layerPath(root), nil
-}
-
 func (img *Image) getParentsSize(size int64) int64 {
 	parentImage, err := img.GetParent()
 	if err != nil || parentImage == nil {
@@ -342,6 +249,25 @@ func (img *Image) getParentsSize(size int64) int64 {
 	return parentImage.getParentsSize(size)
 }
 
+// Depth returns the number of parents for a
+// current image
+func (img *Image) Depth() (int, error) {
+	var (
+		count  = 0
+		parent = img
+		err    error
+	)
+
+	for parent != nil {
+		count++
+		parent, err = parent.GetParent()
+		if err != nil {
+			return -1, err
+		}
+	}
+	return count, nil
+}
+
 // Build an Image object from raw json data
 func NewImgJSON(src []byte) (*Image, error) {
 	ret := &Image{}

+ 4 - 5
integration/commands_test.go

@@ -840,13 +840,12 @@ func TestImagesTree(t *testing.T) {
 			t.Fatal(err)
 		}
 		cmdOutput := string(cmdOutputBytes)
-
 		regexpStrings := []string{
 			fmt.Sprintf("└─%s Size: (\\d+.\\d+ MB) \\(virtual \\d+.\\d+ MB\\) Tags: %s:latest", unitTestImageIDShort, unitTestImageName),
-			"(?m)^  └─[0-9a-f]+",
-			"(?m)^    └─[0-9a-f]+",
-			"(?m)^      └─[0-9a-f]+",
-			fmt.Sprintf("        └─%s Size: \\d+ B \\(virtual \\d+.\\d+ MB\\) Tags: test:latest", utils.TruncateID(image.ID)),
+			"(?m)   └─[0-9a-f]+.*",
+			"(?m)    └─[0-9a-f]+.*",
+			"(?m)      └─[0-9a-f]+.*",
+			fmt.Sprintf("(?m)^        └─%s Size: \\d+.\\d+ MB \\(virtual \\d+.\\d+ MB\\) Tags: test:latest", utils.TruncateID(image.ID)),
 		}
 
 		compiledRegexps := []*regexp.Regexp{}

+ 1 - 1
integration/container_test.go

@@ -172,7 +172,7 @@ func TestDiff(t *testing.T) {
 	// Commit the container
 	img, err := runtime.Commit(container1, "", "", "unit test commited image - diff", "", nil)
 	if err != nil {
-		t.Error(err)
+		t.Fatal(err)
 	}
 
 	// Create a new container from the commited image

+ 271 - 12
integration/graph_test.go

@@ -1,16 +1,24 @@
 package docker
 
 import (
+	"errors"
 	"github.com/dotcloud/docker"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
+	"github.com/dotcloud/docker/utils"
+	"io"
 	"io/ioutil"
 	"os"
 	"path"
 	"testing"
+	"time"
 )
 
 func TestMount(t *testing.T) {
-	graph := tempGraph(t)
+	graph, driver := tempGraph(t)
 	defer os.RemoveAll(graph.Root)
+	defer driver.Cleanup()
+
 	archive, err := fakeTar()
 	if err != nil {
 		t.Fatal(err)
@@ -32,26 +40,277 @@ func TestMount(t *testing.T) {
 	if err := os.MkdirAll(rw, 0700); err != nil {
 		t.Fatal(err)
 	}
-	if err := image.Mount(rootfs, rw); err != nil {
+
+	if _, err := driver.Get(image.ID); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestInit(t *testing.T) {
+	graph, _ := tempGraph(t)
+	defer nukeGraph(graph)
+	// Root should exist
+	if _, err := os.Stat(graph.Root); err != nil {
+		t.Fatal(err)
+	}
+	// Map() should be empty
+	if l, err := graph.Map(); err != nil {
+		t.Fatal(err)
+	} else if len(l) != 0 {
+		t.Fatalf("len(Map()) should return %d, not %d", 0, len(l))
+	}
+}
+
+// Test that Register can be interrupted cleanly without side effects
+func TestInterruptedRegister(t *testing.T) {
+	graph, _ := tempGraph(t)
+	defer nukeGraph(graph)
+	badArchive, w := io.Pipe() // Use a pipe reader as a fake archive which never yields data
+	image := &docker.Image{
+		ID:      docker.GenerateID(),
+		Comment: "testing",
+		Created: time.Now(),
+	}
+	w.CloseWithError(errors.New("But I'm not a tarball!")) // (Nobody's perfect, darling)
+	graph.Register(nil, badArchive, image)
+	if _, err := graph.Get(image.ID); err == nil {
+		t.Fatal("Image should not exist after Register is interrupted")
+	}
+	// Registering the same image again should succeed if the first register was interrupted
+	goodArchive, err := fakeTar()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := graph.Register(nil, goodArchive, image); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// FIXME: Do more extensive tests (ex: create multiple, delete, recreate;
+//       create multiple, check the amount of images and paths, etc..)
+func TestGraphCreate(t *testing.T) {
+	graph, _ := tempGraph(t)
+	defer nukeGraph(graph)
+	archive, err := fakeTar()
+	if err != nil {
+		t.Fatal(err)
+	}
+	image, err := graph.Create(archive, nil, "Testing", "", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := docker.ValidateID(image.ID); err != nil {
 		t.Fatal(err)
 	}
-	// FIXME: test for mount contents
-	defer func() {
-		if err := docker.Unmount(rootfs); err != nil {
-			t.Error(err)
+	if image.Comment != "Testing" {
+		t.Fatalf("Wrong comment: should be '%s', not '%s'", "Testing", image.Comment)
+	}
+	if image.DockerVersion != docker.VERSION {
+		t.Fatalf("Wrong docker_version: should be '%s', not '%s'", docker.VERSION, image.DockerVersion)
+	}
+	images, err := graph.Map()
+	if err != nil {
+		t.Fatal(err)
+	} else if l := len(images); l != 1 {
+		t.Fatalf("Wrong number of images. Should be %d, not %d", 1, l)
+	}
+	if images[image.ID] == nil {
+		t.Fatalf("Could not find image with id %s", image.ID)
+	}
+}
+
+func TestRegister(t *testing.T) {
+	graph, _ := tempGraph(t)
+	defer nukeGraph(graph)
+	archive, err := fakeTar()
+	if err != nil {
+		t.Fatal(err)
+	}
+	image := &docker.Image{
+		ID:      docker.GenerateID(),
+		Comment: "testing",
+		Created: time.Now(),
+	}
+	err = graph.Register(nil, archive, image)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if images, err := graph.Map(); err != nil {
+		t.Fatal(err)
+	} else if l := len(images); l != 1 {
+		t.Fatalf("Wrong number of images. Should be %d, not %d", 1, l)
+	}
+	if resultImg, err := graph.Get(image.ID); err != nil {
+		t.Fatal(err)
+	} else {
+		if resultImg.ID != image.ID {
+			t.Fatalf("Wrong image ID. Should be '%s', not '%s'", image.ID, resultImg.ID)
 		}
-	}()
+		if resultImg.Comment != image.Comment {
+			t.Fatalf("Wrong image comment. Should be '%s', not '%s'", image.Comment, resultImg.Comment)
+		}
+	}
+}
+
+// Test that an image can be deleted by its shorthand prefix
+func TestDeletePrefix(t *testing.T) {
+	graph, _ := tempGraph(t)
+	defer nukeGraph(graph)
+	img := createTestImage(graph, t)
+	if err := graph.Delete(utils.TruncateID(img.ID)); err != nil {
+		t.Fatal(err)
+	}
+	assertNImages(graph, t, 0)
+}
+
+func createTestImage(graph *docker.Graph, t *testing.T) *docker.Image {
+	archive, err := fakeTar()
+	if err != nil {
+		t.Fatal(err)
+	}
+	img, err := graph.Create(archive, nil, "Test image", "", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return img
+}
+
+func TestDelete(t *testing.T) {
+	graph, _ := tempGraph(t)
+	defer nukeGraph(graph)
+	archive, err := fakeTar()
+	if err != nil {
+		t.Fatal(err)
+	}
+	assertNImages(graph, t, 0)
+	img, err := graph.Create(archive, nil, "Bla bla", "", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	assertNImages(graph, t, 1)
+	if err := graph.Delete(img.ID); err != nil {
+		t.Fatal(err)
+	}
+	assertNImages(graph, t, 0)
+
+	archive, err = fakeTar()
+	if err != nil {
+		t.Fatal(err)
+	}
+	// Test 2 create (same name) / 1 delete
+	img1, err := graph.Create(archive, nil, "Testing", "", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	archive, err = fakeTar()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err = graph.Create(archive, nil, "Testing", "", nil); err != nil {
+		t.Fatal(err)
+	}
+	assertNImages(graph, t, 2)
+	if err := graph.Delete(img1.ID); err != nil {
+		t.Fatal(err)
+	}
+	assertNImages(graph, t, 1)
+
+	// Test delete wrong name
+	if err := graph.Delete("Not_foo"); err == nil {
+		t.Fatalf("Deleting wrong ID should return an error")
+	}
+	assertNImages(graph, t, 1)
+
+	archive, err = fakeTar()
+	if err != nil {
+		t.Fatal(err)
+	}
+	// Test delete twice (pull -> rm -> pull -> rm)
+	if err := graph.Register(nil, archive, img1); err != nil {
+		t.Fatal(err)
+	}
+	if err := graph.Delete(img1.ID); err != nil {
+		t.Fatal(err)
+	}
+	assertNImages(graph, t, 1)
 }
 
-//FIXME: duplicate
-func tempGraph(t *testing.T) *docker.Graph {
-	tmp, err := ioutil.TempDir("", "docker-graph-")
+func TestByParent(t *testing.T) {
+	archive1, _ := fakeTar()
+	archive2, _ := fakeTar()
+	archive3, _ := fakeTar()
+
+	graph, _ := tempGraph(t)
+	defer nukeGraph(graph)
+	parentImage := &docker.Image{
+		ID:      docker.GenerateID(),
+		Comment: "parent",
+		Created: time.Now(),
+		Parent:  "",
+	}
+	childImage1 := &docker.Image{
+		ID:      docker.GenerateID(),
+		Comment: "child1",
+		Created: time.Now(),
+		Parent:  parentImage.ID,
+	}
+	childImage2 := &docker.Image{
+		ID:      docker.GenerateID(),
+		Comment: "child2",
+		Created: time.Now(),
+		Parent:  parentImage.ID,
+	}
+	_ = graph.Register(nil, archive1, parentImage)
+	_ = graph.Register(nil, archive2, childImage1)
+	_ = graph.Register(nil, archive3, childImage2)
+
+	byParent, err := graph.ByParent()
 	if err != nil {
 		t.Fatal(err)
 	}
-	graph, err := docker.NewGraph(tmp)
+	numChildren := len(byParent[parentImage.ID])
+	if numChildren != 2 {
+		t.Fatalf("Expected 2 children, found %d", numChildren)
+	}
+}
+
+/*
+ * HELPER FUNCTIONS
+ */
+
+func assertNImages(graph *docker.Graph, t *testing.T, n int) {
+	if images, err := graph.Map(); err != nil {
+		t.Fatal(err)
+	} else if actualN := len(images); actualN != n {
+		t.Fatalf("Expected %d images, found %d", n, actualN)
+	}
+}
+
+func tempGraph(t *testing.T) (*docker.Graph, graphdriver.Driver) {
+        tmp, err := ioutil.TempDir("", "docker-graph-")
+        if err != nil {
+                t.Fatal(err)
+        }
+        driver, err := graphdriver.New(tmp)
+        if err != nil {
+                t.Fatal(err)
+        }
+        graph, err := docker.NewGraph(tmp, driver)
+        if err != nil {
+                t.Fatal(err)
+        }
+        return graph, driver
+}
+
+func nukeGraph(graph *docker.Graph) {
+	graph.Driver().Cleanup()
+	os.RemoveAll(graph.Root)
+}
+
+func testArchive(t *testing.T) archive.Archive {
+	archive, err := fakeTar()
 	if err != nil {
 		t.Fatal(err)
 	}
-	return graph
+	return archive
 }

+ 3 - 0
integration/runtime_test.go

@@ -74,6 +74,9 @@ func layerArchive(tarfile string) (io.Reader, error) {
 }
 
 func init() {
+	// Always use the same driver (vfs) for all integration tests.
+	// To test other drivers, we need a dedicated driver validation suite.
+	os.Setenv("DOCKER_DRIVER", "vfs")
 	os.Setenv("TEST", "1")
 
 	// Hack to run sys init during unit testing

+ 40 - 3
namesgenerator/names-generator.go

@@ -11,14 +11,51 @@ type NameChecker interface {
 }
 
 var (
-	colors  = [...]string{"white", "silver", "gray", "black", "blue", "green", "cyan", "yellow", "gold", "orange", "brown", "red", "violet", "pink", "magenta", "purple", "maroon", "crimson", "plum", "fuchsia", "lavender", "slate", "navy", "azure", "aqua", "olive", "teal", "lime", "beige", "tan", "sienna"}
-	animals = [...]string{"ant", "bear", "bird", "cat", "chicken", "cow", "deer", "dog", "donkey", "duck", "fish", "fox", "frog", "horse", "kangaroo", "koala", "lemur", "lion", "lizard", "monkey", "octopus", "pig", "shark", "sheep", "sloth", "spider", "squirrel", "tiger", "toad", "weasel", "whale", "wolf"}
+	left = [...]string{"happy", "jolly", "dreamy", "sad", "angry", "pensive", "focused", "sleepy", "grave", "distracted", "determined", "stoic", "stupefied", "sharp", "agitated", "cocky", "tender", "goofy", "furious", "desperate", "hopeful", "compassionate", "silly", "lonely", "condescending", "naughty", "kickass", "drunk", "boring", "nostalgic", "ecstatic", "insane", "cranky", "mad", "jovial", "sick", "hungry", "thirsty", "elegant", "backstabbing", "clever", "trusting", "loving", "suspicious", "berserk", "high", "romantic", "prickly", "evil"}
+	// Docker 0.7.x generates names from notable scientists and hackers.
+	//
+	// Ada Lovelace invented the first algorithm. http://en.wikipedia.org/wiki/Ada_Lovelace (thanks James Turnbull)
+	// Alan Turing was a founding father of computer science. http://en.wikipedia.org/wiki/Alan_Turing.
+	// Albert Einstein invented the general theory of relativity. http://en.wikipedia.org/wiki/Albert_Einstein
+	// Ambroise Pare invented modern surgery. http://en.wikipedia.org/wiki/Ambroise_Par%C3%A9
+	// Archimedes was a physicist, engineer and mathematician who invented too many things to list them here. http://en.wikipedia.org/wiki/Archimedes
+	// Benjamin Franklin is famous for his experiments in electricity and the invention of the lightning rod.
+	// Charles Babbage invented the concept of a programmable computer. http://en.wikipedia.org/wiki/Charles_Babbage.
+	// Charles Darwin established the principles of natural evolution. http://en.wikipedia.org/wiki/Charles_Darwin.
+	// Dennis Ritchie and Ken Thompson created UNIX and the C programming language. http://en.wikipedia.org/wiki/Dennis_Ritchie http://en.wikipedia.org/wiki/Ken_Thompson
+	// Douglas Engelbart gave the mother of all demos: http://en.wikipedia.org/wiki/Douglas_Engelbart
+	// Emmett Brown invented time travel. http://en.wikipedia.org/wiki/Emmett_Brown (thanks Brian Goff)
+	// Enrico Fermi invented the first nuclear reactor. http://en.wikipedia.org/wiki/Enrico_Fermi.
+	// Euclid invented geometry. http://en.wikipedia.org/wiki/Euclid
+	// Galileo was a founding father of modern astronomy, and faced politics and obscurantism to establish scientific truth.  http://en.wikipedia.org/wiki/Galileo_Galilei
+	// Henry Poincare made fundamental contributions in several fields of mathematics. http://en.wikipedia.org/wiki/Henri_Poincar%C3%A9
+	// Isaac Newton invented classic mechanics and modern optics. http://en.wikipedia.org/wiki/Isaac_Newton
+	// John McCarthy invented LISP: http://en.wikipedia.org/wiki/John_McCarthy_(computer_scientist)
+	// Leonardo Da Vinci invented too many things to list here. http://en.wikipedia.org/wiki/Leonardo_da_Vinci.
+	// Linus Torvalds invented Linux and Git. http://en.wikipedia.org/wiki/Linus_Torvalds
+	// Louis Pasteur discovered vaccination, fermentation and pasteurization. http://en.wikipedia.org/wiki/Louis_Pasteur.
+	// Malcolm McLean invented the modern shipping container: http://en.wikipedia.org/wiki/Malcom_McLean
+	// Marie Curie discovered radioactivity. http://en.wikipedia.org/wiki/Marie_Curie.
+	// Muhammad ibn Jābir al-Ḥarrānī al-Battānī was a founding father of astronomy. http://en.wikipedia.org/wiki/Mu%E1%B8%A5ammad_ibn_J%C4%81bir_al-%E1%B8%A4arr%C4%81n%C4%AB_al-Batt%C4%81n%C4%AB
+	// Niels Bohr is the father of quantum theory. http://en.wikipedia.org/wiki/Niels_Bohr.
+	// Nikola Tesla invented the AC electric system and every gaget ever used by a James Bond villain. http://en.wikipedia.org/wiki/Nikola_Tesla
+	// Pierre de Fermat pioneered several aspects of modern mathematics. http://en.wikipedia.org/wiki/Pierre_de_Fermat
+	// Richard Feynmann was a key contributor to quantum mechanics and particle physics. http://en.wikipedia.org/wiki/Richard_Feynman
+	// Rob Pike was a key contributor to Unix, Plan 9, the X graphic system, utf-8, and the Go programming language. http://en.wikipedia.org/wiki/Rob_Pike
+	// Stephen Hawking pioneered the field of cosmology by combining general relativity and quantum mechanics. http://en.wikipedia.org/wiki/Stephen_Hawking
+	// Steve Wozniak invented the Apple I and Apple II. http://en.wikipedia.org/wiki/Steve_Wozniak
+	// Werner Heisenberg was a founding father of quantum mechanics. http://en.wikipedia.org/wiki/Werner_Heisenberg
+	// William Shockley, Walter Houser Brattain and John Bardeen co-invented the transistor (thanks Brian Goff).
+	//	http://en.wikipedia.org/wiki/John_Bardeen
+	//	http://en.wikipedia.org/wiki/Walter_Houser_Brattain
+	//	http://en.wikipedia.org/wiki/William_Shockley
+	right = [...]string{"lovelace", "franklin", "tesla", "einstein", "bohr", "davinci", "pasteur", "nobel", "curie", "darwin", "turing", "ritchie", "torvalds", "pike", "thompson", "wozniak", "galileo", "euclide", "newton", "fermat", "archimede", "poincare", "heisenberg", "feynmann", "hawkings", "fermi", "pare", "mccarthy", "engelbart", "babbage", "albattani", "ptolemy", "bell", "wright", "lumiere", "morse", "mclean", "brown", "bardeen", "brattain", "shockley"}
 )
 
 func GenerateRandomName(checker NameChecker) (string, error) {
 	retry := 5
 	rand.Seed(time.Now().UnixNano())
-	name := fmt.Sprintf("%s_%s", colors[rand.Intn(len(colors))], animals[rand.Intn(len(animals))])
+	name := fmt.Sprintf("%s_%s", left[rand.Intn(len(left))], right[rand.Intn(len(right))])
 	for checker != nil && checker.Exists(name) && retry > 0 {
 		name = fmt.Sprintf("%s%d", name, rand.Intn(10))
 		retry = retry - 1

+ 21 - 0
namesgenerator/names-generator_test.go

@@ -26,3 +26,24 @@ func TestGenerateRandomName(t *testing.T) {
 	}
 
 }
+
+// Make sure the generated names are awesome
+func TestGenerateAwesomeNames(t *testing.T) {
+	name, err := GenerateRandomName(&FalseChecker{})
+	if err != nil {
+		t.Error(err)
+	}
+	if !isAwesome(name) {
+		t.Fatalf("Generated name '%s' is not awesome.", name)
+	}
+}
+
+// To be awesome, a container name must involve cool inventors, be easy to remember,
+// be at least mildly funny, and always be politically correct for enterprise adoption.
+func isAwesome(name string) bool {
+	coolInventorNames := true
+	easyToRemember := true
+	mildlyFunnyOnOccasion := true
+	politicallyCorrect := true
+	return coolInventorNames && easyToRemember && mildlyFunnyOnOccasion && politicallyCorrect
+}

+ 3 - 0
network.go

@@ -661,6 +661,9 @@ func (manager *NetworkManager) Allocate() (*NetworkInterface, error) {
 }
 
 func (manager *NetworkManager) Close() error {
+	if manager.disabled {
+		return nil
+	}
 	err1 := manager.tcpPortAllocator.Close()
 	err2 := manager.udpPortAllocator.Close()
 	err3 := manager.ipAllocator.Close()

+ 183 - 27
runtime.go

@@ -5,7 +5,12 @@ import (
 	"container/list"
 	"database/sql"
 	"fmt"
-	"github.com/dotcloud/docker/gograph"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdb"
+	"github.com/dotcloud/docker/graphdriver"
+	"github.com/dotcloud/docker/graphdriver/aufs"
+	_ "github.com/dotcloud/docker/graphdriver/devmapper"
+	_ "github.com/dotcloud/docker/graphdriver/vfs"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io/ioutil"
@@ -19,6 +24,9 @@ import (
 	"time"
 )
 
+// Set the max depth to the aufs restriction
+const MaxImageDepth = 42
+
 var defaultDns = []string{"8.8.8.8", "8.8.4.4"}
 
 type Capabilities struct {
@@ -30,6 +38,7 @@ type Capabilities struct {
 
 type Runtime struct {
 	repository     string
+	sysInitPath    string
 	containers     *list.List
 	networkManager *NetworkManager
 	graph          *Graph
@@ -39,7 +48,8 @@ type Runtime struct {
 	volumes        *Graph
 	srv            *Server
 	config         *DaemonConfig
-	containerGraph *gograph.Database
+	containerGraph *graphdb.Database
+	driver         graphdriver.Driver
 }
 
 // List returns an array of all containers registered in the runtime.
@@ -118,6 +128,13 @@ func (runtime *Runtime) Register(container *Container) error {
 		return err
 	}
 
+	// Get the root filesystem from the driver
+	rootfs, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return fmt.Errorf("Error getting container filesystem %s from driver %s: %s", container.ID, runtime.driver, err)
+	}
+	container.rootfs = rootfs
+
 	container.runtime = runtime
 
 	// Attach to stdout and stderr
@@ -216,12 +233,8 @@ func (runtime *Runtime) Destroy(container *Container) error {
 		return err
 	}
 
-	if mounted, err := container.Mounted(); err != nil {
-		return err
-	} else if mounted {
-		if err := container.Unmount(); err != nil {
-			return fmt.Errorf("Unable to unmount container %v: %v", container.ID, err)
-		}
+	if err := runtime.driver.Remove(container.ID); err != nil {
+		return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", runtime.driver, container.ID, err)
 	}
 
 	if _, err := runtime.containerGraph.Purge(container.ID); err != nil {
@@ -247,6 +260,7 @@ func (runtime *Runtime) restore() error {
 		return err
 	}
 	containers := make(map[string]*Container)
+	currentDriver := runtime.driver.String()
 
 	for i, v := range dir {
 		id := v.Name()
@@ -258,8 +272,14 @@ func (runtime *Runtime) restore() error {
 			utils.Errorf("Failed to load container %v: %v", id, err)
 			continue
 		}
-		utils.Debugf("Loaded container %v", container.ID)
-		containers[container.ID] = container
+
+		// Ignore the container if it does not support the current driver being used by the graph
+		if container.Driver == "" && currentDriver == "aufs" || container.Driver == currentDriver {
+			utils.Debugf("Loaded container %v", container.ID)
+			containers[container.ID] = container
+		} else {
+			utils.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
+		}
 	}
 
 	register := func(container *Container) {
@@ -344,6 +364,17 @@ func (runtime *Runtime) Create(config *Config, name string) (*Container, []strin
 		return nil, nil, err
 	}
 
+	// We add 2 layers to the depth because the container's rw and
+	// init layer add to the restriction
+	depth, err := img.Depth()
+	if err != nil {
+		return nil, nil, err
+	}
+
+	if depth+2 >= MaxImageDepth {
+		return nil, nil, fmt.Errorf("Cannot create container with more than %d parents", MaxImageDepth)
+	}
+
 	checkDeprecatedExpose := func(config *Config) bool {
 		if config != nil {
 			if config.PortSpecs != nil {
@@ -374,11 +405,6 @@ func (runtime *Runtime) Create(config *Config, name string) (*Container, []strin
 		return nil, nil, fmt.Errorf("No command specified")
 	}
 
-	sysInitPath := utils.DockerInitPath()
-	if sysInitPath == "" {
-		return nil, nil, fmt.Errorf("Could not locate dockerinit: This usually means docker was built incorrectly. See http://docs.docker.io/en/latest/contributing/devenvironment for official build instructions.")
-	}
-
 	// Generate id
 	id := GenerateID()
 
@@ -421,7 +447,7 @@ func (runtime *Runtime) Create(config *Config, name string) (*Container, []strin
 	container := &Container{
 		// FIXME: we should generate the ID here instead of receiving it as an argument
 		ID:              id,
-		Created:         time.Now(),
+		Created:         time.Now().UTC(),
 		Path:            entrypoint,
 		Args:            args, //FIXME: de-duplicate from config
 		Config:          config,
@@ -429,8 +455,9 @@ func (runtime *Runtime) Create(config *Config, name string) (*Container, []strin
 		Image:           img.ID, // Always use the resolved image id
 		NetworkSettings: &NetworkSettings{},
 		// FIXME: do we need to store this in the container?
-		SysInitPath: sysInitPath,
+		SysInitPath: runtime.sysInitPath,
 		Name:        name,
+		Driver:      runtime.driver.String(),
 	}
 	container.root = runtime.containerRoot(container.ID)
 	// Step 1: create the container directory.
@@ -439,6 +466,21 @@ func (runtime *Runtime) Create(config *Config, name string) (*Container, []strin
 		return nil, nil, err
 	}
 
+	initID := fmt.Sprintf("%s-init", container.ID)
+	if err := runtime.driver.Create(initID, img.ID); err != nil {
+		return nil, nil, err
+	}
+	initPath, err := runtime.driver.Get(initID)
+	if err != nil {
+		return nil, nil, err
+	}
+	if err := setupInitLayer(initPath); err != nil {
+		return nil, nil, err
+	}
+
+	if err := runtime.driver.Create(container.ID, initID); err != nil {
+		return nil, nil, err
+	}
 	resolvConf, err := utils.GetResolvConf()
 	if err != nil {
 		return nil, nil, err
@@ -549,7 +591,7 @@ func (runtime *Runtime) Children(name string) (map[string]*Container, error) {
 	}
 	children := make(map[string]*Container)
 
-	err = runtime.containerGraph.Walk(name, func(p string, e *gograph.Entity) error {
+	err = runtime.containerGraph.Walk(name, func(p string, e *graphdb.Entity) error {
 		c := runtime.Get(e.ID())
 		if c == nil {
 			return fmt.Errorf("Could not get container for name %s and id %s", e.ID(), p)
@@ -584,24 +626,48 @@ func NewRuntime(config *DaemonConfig) (*Runtime, error) {
 }
 
 func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
+
+	// Set the default driver
+	graphdriver.DefaultDriver = config.GraphDriver
+
+	// Load storage driver
+	driver, err := graphdriver.New(config.Root)
+	if err != nil {
+		return nil, err
+	}
+	utils.Debugf("Using graph driver %s", driver)
+
 	runtimeRepo := path.Join(config.Root, "containers")
 
 	if err := os.MkdirAll(runtimeRepo, 0700); err != nil && !os.IsExist(err) {
 		return nil, err
 	}
 
+	if ad, ok := driver.(*aufs.Driver); ok {
+		if err := ad.Migrate(config.Root, setupInitLayer); err != nil {
+			return nil, err
+		}
+	}
+
 	if err := linkLxcStart(config.Root); err != nil {
 		return nil, err
 	}
-	g, err := NewGraph(path.Join(config.Root, "graph"))
+	g, err := NewGraph(path.Join(config.Root, "graph"), driver)
 	if err != nil {
 		return nil, err
 	}
-	volumes, err := NewGraph(path.Join(config.Root, "volumes"))
+
+	// We don't want to use a complex driver like aufs or devmapper
+	// for volumes, just a plain filesystem
+	volumesDriver, err := graphdriver.GetDriver("vfs", config.Root)
 	if err != nil {
 		return nil, err
 	}
-	repositories, err := NewTagStore(path.Join(config.Root, "repositories"), g)
+	volumes, err := NewGraph(path.Join(config.Root, "volumes"), volumesDriver)
+	if err != nil {
+		return nil, err
+	}
+	repositories, err := NewTagStore(path.Join(config.Root, "repositories-"+driver.String()), g)
 	if err != nil {
 		return nil, fmt.Errorf("Couldn't create Tag store: %s", err)
 	}
@@ -613,24 +679,44 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 		return nil, err
 	}
 
-	gographPath := path.Join(config.Root, "linkgraph.db")
+	graphdbPath := path.Join(config.Root, "linkgraph.db")
 	initDatabase := false
-	if _, err := os.Stat(gographPath); err != nil {
+	if _, err := os.Stat(graphdbPath); err != nil {
 		if os.IsNotExist(err) {
 			initDatabase = true
 		} else {
 			return nil, err
 		}
 	}
-	conn, err := sql.Open("sqlite3", gographPath)
+	conn, err := sql.Open("sqlite3", graphdbPath)
 	if err != nil {
 		return nil, err
 	}
-	graph, err := gograph.NewDatabase(conn, initDatabase)
+	graph, err := graphdb.NewDatabase(conn, initDatabase)
 	if err != nil {
 		return nil, err
 	}
 
+	localCopy := path.Join(config.Root, "init", fmt.Sprintf("dockerinit-%s", VERSION))
+	sysInitPath := utils.DockerInitPath(localCopy)
+	if sysInitPath == "" {
+		return nil, fmt.Errorf("Could not locate dockerinit: This usually means docker was built incorrectly. See http://docs.docker.io/en/latest/contributing/devenvironment for official build instructions.")
+	}
+
+	if !utils.IAMSTATIC {
+		if err := os.Mkdir(path.Join(config.Root, fmt.Sprintf("init")), 0700); err != nil && !os.IsExist(err) {
+			return nil, err
+		}
+
+		if _, err := utils.CopyFile(sysInitPath, localCopy); err != nil {
+			return nil, err
+		}
+		sysInitPath = localCopy
+		if err := os.Chmod(sysInitPath, 0700); err != nil {
+			return nil, err
+		}
+	}
+
 	runtime := &Runtime{
 		repository:     runtimeRepo,
 		containers:     list.New(),
@@ -642,6 +728,8 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 		volumes:        volumes,
 		config:         config,
 		containerGraph: graph,
+		driver:         driver,
+		sysInitPath:    sysInitPath,
 	}
 
 	if err := runtime.restore(); err != nil {
@@ -651,8 +739,76 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 }
 
 func (runtime *Runtime) Close() error {
-	runtime.networkManager.Close()
-	return runtime.containerGraph.Close()
+	errorsStrings := []string{}
+	if err := runtime.networkManager.Close(); err != nil {
+		utils.Errorf("runtime.networkManager.Close(): %s", err.Error())
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if err := runtime.driver.Cleanup(); err != nil {
+		utils.Errorf("runtime.driver.Cleanup(): %s", err.Error())
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if err := runtime.containerGraph.Close(); err != nil {
+		utils.Errorf("runtime.containerGraph.Close(): %s", err.Error())
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if len(errorsStrings) > 0 {
+		return fmt.Errorf("%s", strings.Join(errorsStrings, ", "))
+	}
+	return nil
+}
+
+func (runtime *Runtime) Mount(container *Container) error {
+	dir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return fmt.Errorf("Error getting container %s from driver %s: %s", container.ID, runtime.driver, err)
+	}
+	if container.rootfs == "" {
+		container.rootfs = dir
+	} else if container.rootfs != dir {
+		return fmt.Errorf("Error: driver %s is returning inconsistent paths for container %s ('%s' then '%s')",
+			runtime.driver, container.ID, container.rootfs, dir)
+	}
+	return nil
+}
+
+func (runtime *Runtime) Unmount(container *Container) error {
+	// FIXME: Unmount is deprecated because drivers are responsible for mounting
+	// and unmounting when necessary. Use driver.Remove() instead.
+	return nil
+}
+
+func (runtime *Runtime) Changes(container *Container) ([]archive.Change, error) {
+	if differ, ok := runtime.driver.(graphdriver.Differ); ok {
+		return differ.Changes(container.ID)
+	}
+	cDir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+	initDir, err := runtime.driver.Get(container.ID + "-init")
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container init rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+	return archive.ChangesDirs(cDir, initDir)
+}
+
+func (runtime *Runtime) Diff(container *Container) (archive.Archive, error) {
+	if differ, ok := runtime.driver.(graphdriver.Differ); ok {
+		return differ.Diff(container.ID)
+	}
+
+	changes, err := runtime.Changes(container)
+	if err != nil {
+		return nil, err
+	}
+
+	cDir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+
+	return archive.ExportChanges(cDir, changes)
 }
 
 // Nuke kills all containers then removes all content

+ 59 - 34
server.go

@@ -8,7 +8,7 @@ import (
 	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/auth"
 	"github.com/dotcloud/docker/engine"
-	"github.com/dotcloud/docker/gograph"
+	"github.com/dotcloud/docker/graphdb"
 	"github.com/dotcloud/docker/registry"
 	"github.com/dotcloud/docker/utils"
 	"io"
@@ -63,7 +63,10 @@ func jobInitApi(job *engine.Job) string {
 	}()
 	job.Eng.Hack_SetGlobalVar("httpapi.server", srv)
 	job.Eng.Hack_SetGlobalVar("httpapi.runtime", srv.runtime)
-	job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", srv.runtime.networkManager.bridgeNetwork.IP)
+	// https://github.com/dotcloud/docker/issues/2768
+	if srv.runtime.networkManager.bridgeNetwork != nil {
+		job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", srv.runtime.networkManager.bridgeNetwork.IP)
+	}
 	if err := job.Eng.Register("create", srv.ContainerCreate); err != nil {
 		return err.Error()
 	}
@@ -282,7 +285,7 @@ func (srv *Server) exportImage(image *Image, tempdir string) error {
 		}
 
 		// serialize filesystem
-		fs, err := archive.Tar(path.Join(srv.runtime.graph.Root, i.ID, "layer"), archive.Uncompressed)
+		fs, err := i.TarLayer()
 		if err != nil {
 			return err
 		}
@@ -339,7 +342,7 @@ func (srv *Server) ImageLoad(in io.Reader) error {
 	if err := os.Mkdir(repoDir, os.ModeDir); err != nil {
 		return err
 	}
-	if err := archive.Untar(repoFile, repoDir); err != nil {
+	if err := archive.Untar(repoFile, repoDir, nil); err != nil {
 		return err
 	}
 
@@ -593,6 +596,8 @@ func (srv *Server) DockerInfo() *APIInfo {
 	return &APIInfo{
 		Containers:         len(srv.runtime.List()),
 		Images:             imgcount,
+		Driver:             srv.runtime.driver.String(),
+		DriverStatus:       srv.runtime.driver.Status(),
 		MemoryLimit:        srv.runtime.capabilities.MemoryLimit,
 		SwapLimit:          srv.runtime.capabilities.SwapLimit,
 		IPv4Forwarding:     !srv.runtime.capabilities.IPv4ForwardingDisabled,
@@ -675,7 +680,7 @@ func (srv *Server) ContainerTop(name, psArgs string) (*APITop, error) {
 	return nil, fmt.Errorf("No such container: %s", name)
 }
 
-func (srv *Server) ContainerChanges(name string) ([]Change, error) {
+func (srv *Server) ContainerChanges(name string) ([]archive.Change, error) {
 	if container := srv.runtime.Get(name); container != nil {
 		return container.Changes()
 	}
@@ -688,7 +693,7 @@ func (srv *Server) Containers(all, size bool, n int, since, before string) []API
 	out := []APIContainers{}
 
 	names := map[string][]string{}
-	srv.runtime.containerGraph.Walk("/", func(p string, e *gograph.Entity) error {
+	srv.runtime.containerGraph.Walk("/", func(p string, e *graphdb.Entity) error {
 		names[e.ID()] = append(names[e.ID()], p)
 		return nil
 	}, -1)
@@ -760,12 +765,13 @@ func (srv *Server) pullImage(r *registry.Registry, out io.Writer, imgID, endpoin
 	// FIXME: Try to stream the images?
 	// FIXME: Launch the getRemoteImage() in goroutines
 
-	for _, id := range history {
+	for i := len(history) - 1; i >= 0; i-- {
+		id := history[i]
 
 		// ensure no two downloads of the same layer happen at the same time
-		if err := srv.poolAdd("pull", "layer:"+id); err != nil {
+		if c, err := srv.poolAdd("pull", "layer:"+id); err != nil {
 			utils.Errorf("Image (id: %s) pull is already running, skipping: %v", id, err)
-			return nil
+			<-c
 		}
 		defer srv.poolRemove("pull", "layer:"+id)
 
@@ -860,7 +866,7 @@ func (srv *Server) pullRepository(r *registry.Registry, out io.Writer, localName
 			}
 
 			// ensure no two downloads of the same image happen at the same time
-			if err := srv.poolAdd("pull", "img:"+img.ID); err != nil {
+			if _, err := srv.poolAdd("pull", "img:"+img.ID); err != nil {
 				utils.Errorf("Image (id: %s) pull is already running, skipping: %v", img.ID, err)
 				if parallel {
 					errors <- nil
@@ -931,38 +937,43 @@ func (srv *Server) pullRepository(r *registry.Registry, out io.Writer, localName
 	return nil
 }
 
-func (srv *Server) poolAdd(kind, key string) error {
+func (srv *Server) poolAdd(kind, key string) (chan struct{}, error) {
 	srv.Lock()
 	defer srv.Unlock()
 
-	if _, exists := srv.pullingPool[key]; exists {
-		return fmt.Errorf("pull %s is already in progress", key)
+	if c, exists := srv.pullingPool[key]; exists {
+		return c, fmt.Errorf("pull %s is already in progress", key)
 	}
-	if _, exists := srv.pushingPool[key]; exists {
-		return fmt.Errorf("push %s is already in progress", key)
+	if c, exists := srv.pushingPool[key]; exists {
+		return c, fmt.Errorf("push %s is already in progress", key)
 	}
 
+	c := make(chan struct{})
 	switch kind {
 	case "pull":
-		srv.pullingPool[key] = struct{}{}
-		break
+		srv.pullingPool[key] = c
 	case "push":
-		srv.pushingPool[key] = struct{}{}
-		break
+		srv.pushingPool[key] = c
 	default:
-		return fmt.Errorf("Unknown pool type")
+		return nil, fmt.Errorf("Unknown pool type")
 	}
-	return nil
+	return c, nil
 }
 
 func (srv *Server) poolRemove(kind, key string) error {
+	srv.Lock()
+	defer srv.Unlock()
 	switch kind {
 	case "pull":
-		delete(srv.pullingPool, key)
-		break
+		if c, exists := srv.pullingPool[key]; exists {
+			close(c)
+			delete(srv.pullingPool, key)
+		}
 	case "push":
-		delete(srv.pushingPool, key)
-		break
+		if c, exists := srv.pushingPool[key]; exists {
+			close(c)
+			delete(srv.pushingPool, key)
+		}
 	default:
 		return fmt.Errorf("Unknown pool type")
 	}
@@ -974,7 +985,7 @@ func (srv *Server) ImagePull(localName string, tag string, out io.Writer, sf *ut
 	if err != nil {
 		return err
 	}
-	if err := srv.poolAdd("pull", localName+":"+tag); err != nil {
+	if _, err := srv.poolAdd("pull", localName+":"+tag); err != nil {
 		return err
 	}
 	defer srv.poolRemove("pull", localName+":"+tag)
@@ -1169,7 +1180,7 @@ func (srv *Server) pushImage(r *registry.Registry, out io.Writer, remote, imgID,
 
 // FIXME: Allow to interrupt current push when new push of same image is done.
 func (srv *Server) ImagePush(localName string, out io.Writer, sf *utils.StreamFormatter, authConfig *auth.AuthConfig, metaHeaders map[string][]string) error {
-	if err := srv.poolAdd("push", localName); err != nil {
+	if _, err := srv.poolAdd("push", localName); err != nil {
 		return err
 	}
 	defer srv.poolRemove("push", localName)
@@ -1815,8 +1826,8 @@ func NewServer(eng *engine.Engine, config *DaemonConfig) (*Server, error) {
 	srv := &Server{
 		Eng:         eng,
 		runtime:     runtime,
-		pullingPool: make(map[string]struct{}),
-		pushingPool: make(map[string]struct{}),
+		pullingPool: make(map[string]chan struct{}),
+		pushingPool: make(map[string]chan struct{}),
 		events:      make([]utils.JSONMessage, 0, 64), //only keeps the 64 last events
 		listeners:   make(map[string]chan utils.JSONMessage),
 		reqFactory:  nil,
@@ -1826,6 +1837,8 @@ func NewServer(eng *engine.Engine, config *DaemonConfig) (*Server, error) {
 }
 
 func (srv *Server) HTTPRequestFactory(metaHeaders map[string][]string) *utils.HTTPRequestFactory {
+	srv.Lock()
+	defer srv.Unlock()
 	if srv.reqFactory == nil {
 		ud := utils.NewHTTPUserAgentDecorator(srv.versionInfos()...)
 		md := &utils.HTTPMetaHeadersDecorator{
@@ -1838,9 +1851,9 @@ func (srv *Server) HTTPRequestFactory(metaHeaders map[string][]string) *utils.HT
 }
 
 func (srv *Server) LogEvent(action, id, from string) *utils.JSONMessage {
-	now := time.Now().Unix()
+	now := time.Now().UTC().Unix()
 	jm := utils.JSONMessage{Status: action, ID: id, From: from, Time: now}
-	srv.events = append(srv.events, jm)
+	srv.AddEvent(jm)
 	for _, c := range srv.listeners {
 		select { // non blocking channel
 		case c <- jm:
@@ -1850,11 +1863,23 @@ func (srv *Server) LogEvent(action, id, from string) *utils.JSONMessage {
 	return &jm
 }
 
+func (srv *Server) AddEvent(jm utils.JSONMessage) {
+	srv.Lock()
+	defer srv.Unlock()
+	srv.events = append(srv.events, jm)
+}
+
+func (srv *Server) GetEvents() []utils.JSONMessage {
+	srv.RLock()
+	defer srv.RUnlock()
+	return srv.events
+}
+
 type Server struct {
-	sync.Mutex
+	sync.RWMutex
 	runtime     *Runtime
-	pullingPool map[string]struct{}
-	pushingPool map[string]struct{}
+	pullingPool map[string]chan struct{}
+	pushingPool map[string]chan struct{}
 	events      []utils.JSONMessage
 	listeners   map[string]chan utils.JSONMessage
 	reqFactory  *utils.HTTPRequestFactory

+ 16 - 26
server_unit_test.go

@@ -8,49 +8,38 @@ import (
 
 func TestPools(t *testing.T) {
 	srv := &Server{
-		pullingPool: make(map[string]struct{}),
-		pushingPool: make(map[string]struct{}),
+		pullingPool: make(map[string]chan struct{}),
+		pushingPool: make(map[string]chan struct{}),
 	}
 
-	err := srv.poolAdd("pull", "test1")
-	if err != nil {
+	if _, err := srv.poolAdd("pull", "test1"); err != nil {
 		t.Fatal(err)
 	}
-	err = srv.poolAdd("pull", "test2")
-	if err != nil {
+	if _, err := srv.poolAdd("pull", "test2"); err != nil {
 		t.Fatal(err)
 	}
-	err = srv.poolAdd("push", "test1")
-	if err == nil || err.Error() != "pull test1 is already in progress" {
+	if _, err := srv.poolAdd("push", "test1"); err == nil || err.Error() != "pull test1 is already in progress" {
 		t.Fatalf("Expected `pull test1 is already in progress`")
 	}
-	err = srv.poolAdd("pull", "test1")
-	if err == nil || err.Error() != "pull test1 is already in progress" {
+	if _, err := srv.poolAdd("pull", "test1"); err == nil || err.Error() != "pull test1 is already in progress" {
 		t.Fatalf("Expected `pull test1 is already in progress`")
 	}
-	err = srv.poolAdd("wait", "test3")
-	if err == nil || err.Error() != "Unknown pool type" {
+	if _, err := srv.poolAdd("wait", "test3"); err == nil || err.Error() != "Unknown pool type" {
 		t.Fatalf("Expected `Unknown pool type`")
 	}
-
-	err = srv.poolRemove("pull", "test2")
-	if err != nil {
+	if err := srv.poolRemove("pull", "test2"); err != nil {
 		t.Fatal(err)
 	}
-	err = srv.poolRemove("pull", "test2")
-	if err != nil {
+	if err := srv.poolRemove("pull", "test2"); err != nil {
 		t.Fatal(err)
 	}
-	err = srv.poolRemove("pull", "test1")
-	if err != nil {
+	if err := srv.poolRemove("pull", "test1"); err != nil {
 		t.Fatal(err)
 	}
-	err = srv.poolRemove("push", "test1")
-	if err != nil {
+	if err := srv.poolRemove("push", "test1"); err != nil {
 		t.Fatal(err)
 	}
-	err = srv.poolRemove("wait", "test3")
-	if err == nil || err.Error() != "Unknown pool type" {
+	if err := srv.poolRemove("wait", "test3"); err == nil || err.Error() != "Unknown pool type" {
 		t.Fatalf("Expected `Unknown pool type`")
 	}
 }
@@ -70,8 +59,9 @@ func TestLogEvent(t *testing.T) {
 
 	srv.LogEvent("fakeaction2", "fakeid", "fakeimage")
 
-	if len(srv.events) != 2 {
-		t.Fatalf("Expected 2 events, found %d", len(srv.events))
+	numEvents := len(srv.GetEvents())
+	if numEvents != 2 {
+		t.Fatalf("Expected 2 events, found %d", numEvents)
 	}
 	go func() {
 		time.Sleep(200 * time.Millisecond)
@@ -83,7 +73,7 @@ func TestLogEvent(t *testing.T) {
 	setTimeout(t, "Listening for events timed out", 2*time.Second, func() {
 		for i := 2; i < 4; i++ {
 			event := <-listener
-			if event != srv.events[i] {
+			if event != srv.GetEvents()[i] {
 				t.Fatalf("Event received it different than expected")
 			}
 		}

+ 3 - 3
state.go

@@ -26,7 +26,7 @@ func (s *State) String() string {
 		if s.Ghost {
 			return fmt.Sprintf("Ghost")
 		}
-		return fmt.Sprintf("Up %s", utils.HumanDuration(time.Now().Sub(s.StartedAt)))
+		return fmt.Sprintf("Up %s", utils.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 	}
 	return fmt.Sprintf("Exit %d", s.ExitCode)
 }
@@ -67,7 +67,7 @@ func (s *State) SetRunning(pid int) {
 	s.Ghost = false
 	s.ExitCode = 0
 	s.Pid = pid
-	s.StartedAt = time.Now()
+	s.StartedAt = time.Now().UTC()
 }
 
 func (s *State) SetStopped(exitCode int) {
@@ -76,6 +76,6 @@ func (s *State) SetStopped(exitCode int) {
 
 	s.Running = false
 	s.Pid = 0
-	s.FinishedAt = time.Now()
+	s.FinishedAt = time.Now().UTC()
 	s.ExitCode = exitCode
 }

+ 9 - 3
tags_unit_test.go

@@ -1,6 +1,7 @@
 package docker
 
 import (
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/utils"
 	"os"
 	"path"
@@ -8,12 +9,16 @@ import (
 )
 
 const (
-	testImageName string = "myapp"
-	testImageID   string = "foo"
+	testImageName = "myapp"
+	testImageID   = "foo"
 )
 
 func mkTestTagStore(root string, t *testing.T) *TagStore {
-	graph, err := NewGraph(root)
+	driver, err := graphdriver.New(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	graph, err := NewGraph(root, driver)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -42,6 +47,7 @@ func TestLookupImage(t *testing.T) {
 	}
 	defer os.RemoveAll(tmp)
 	store := mkTestTagStore(tmp, t)
+	defer store.graph.driver.Cleanup()
 
 	if img, err := store.LookupImage(testImageName); err != nil {
 		t.Fatal(err)

+ 66 - 6
utils.go

@@ -1,14 +1,43 @@
 package docker
 
+/*
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <errno.h>
+
+// See linux.git/fs/btrfs/ioctl.h
+#define BTRFS_IOCTL_MAGIC 0x94
+#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
+
+int
+btrfs_reflink(int fd_out, int fd_in)
+{
+  int res;
+  res = ioctl(fd_out, BTRFS_IOC_CLONE, fd_in);
+  if (res < 0)
+    return errno;
+  return 0;
+}
+
+*/
+import "C"
 import (
 	"fmt"
+	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/namesgenerator"
 	"github.com/dotcloud/docker/utils"
+	"io"
 	"io/ioutil"
+	"os"
 	"strconv"
 	"strings"
+	"syscall"
 )
 
+type Change struct {
+	archive.Change
+}
+
 // Compare two Config struct. Do not compare the "Image" nor "Hostname" fields
 // If OpenStdin is set, then it differs
 func CompareConfig(a, b *Config) bool {
@@ -206,14 +235,23 @@ func parseLxcOpt(opt string) (string, string, error) {
 	return strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]), nil
 }
 
+// FIXME: network related stuff (including parsing) should be grouped in network file
+const (
+	PortSpecTemplate       = "ip:hostPort:containerPort"
+	PortSpecTemplateFormat = "ip:hostPort:containerPort | ip::containerPort | hostPort:containerPort"
+)
+
 // We will receive port specs in the format of ip:public:private/proto and these need to be
 // parsed in the internal types
 func parsePortSpecs(ports []string) (map[Port]struct{}, map[Port][]PortBinding, error) {
-	exposedPorts := make(map[Port]struct{}, len(ports))
-	bindings := make(map[Port][]PortBinding)
+	var (
+		exposedPorts = make(map[Port]struct{}, len(ports))
+		bindings     = make(map[Port][]PortBinding)
+	)
 
 	for _, rawPort := range ports {
 		proto := "tcp"
+
 		if i := strings.LastIndex(rawPort, "/"); i != -1 {
 			proto = rawPort[i+1:]
 			rawPort = rawPort[:i]
@@ -224,13 +262,16 @@ func parsePortSpecs(ports []string) (map[Port]struct{}, map[Port][]PortBinding,
 			rawPort = fmt.Sprintf(":%s", rawPort)
 		}
 
-		parts, err := utils.PartParser("ip:hostPort:containerPort", rawPort)
+		parts, err := utils.PartParser(PortSpecTemplate, rawPort)
 		if err != nil {
 			return nil, nil, err
 		}
-		containerPort := parts["containerPort"]
-		rawIp := parts["ip"]
-		hostPort := parts["hostPort"]
+
+		var (
+			containerPort = parts["containerPort"]
+			rawIp         = parts["ip"]
+			hostPort      = parts["hostPort"]
+		)
 
 		if containerPort == "" {
 			return nil, nil, fmt.Errorf("No port specified: %s<empty>", rawPort)
@@ -305,6 +346,14 @@ func migratePortMappings(config *Config, hostConfig *HostConfig) error {
 	return nil
 }
 
+func BtrfsReflink(fd_out, fd_in uintptr) error {
+	res := C.btrfs_reflink(C.int(fd_out), C.int(fd_in))
+	if res != 0 {
+		return syscall.Errno(res)
+	}
+	return nil
+}
+
 // Links come in the format of
 // name:alias
 func parseLink(rawLink string) (map[string]string, error) {
@@ -337,3 +386,14 @@ func (c *checker) Exists(name string) bool {
 func generateRandomName(runtime *Runtime) (string, error) {
 	return namesgenerator.GenerateRandomName(&checker{runtime})
 }
+
+func CopyFile(dstFile, srcFile *os.File) error {
+	err := BtrfsReflink(dstFile.Fd(), srcFile.Fd())
+	if err == nil {
+		return nil
+	}
+
+	// Fall back to normal copy
+	_, err = io.Copy(dstFile, srcFile)
+	return err
+}

+ 35 - 0
utils/fs.go

@@ -0,0 +1,35 @@
+package utils
+
+import (
+	"os"
+	"path/filepath"
+	"syscall"
+)
+
+// TreeSize walks a directory tree and returns its total size in bytes.
+func TreeSize(dir string) (size int64, err error) {
+	data := make(map[uint64]bool)
+	err = filepath.Walk(dir, func(d string, fileInfo os.FileInfo, e error) error {
+		// Ignore directory sizes
+		if fileInfo == nil {
+			return nil
+		}
+
+		s := fileInfo.Size()
+		if fileInfo.IsDir() || s == 0 {
+			return nil
+		}
+
+		// Check inode to handle hard links correctly
+		inode := fileInfo.Sys().(*syscall.Stat_t).Ino
+		if _, exists := data[inode]; exists {
+			return nil
+		}
+		data[inode] = false
+
+		size += s
+
+		return nil
+	})
+	return
+}

+ 47 - 13
utils/utils.go

@@ -270,13 +270,14 @@ func isValidDockerInitPath(target string, selfPath string) bool { // target and
 }
 
 // Figure out the path of our dockerinit (which may be SelfPath())
-func DockerInitPath() string {
+func DockerInitPath(localCopy string) string {
 	selfPath := SelfPath()
 	if isValidDockerInitPath(selfPath, selfPath) {
 		// if we're valid, don't bother checking anything else
 		return selfPath
 	}
 	var possibleInits = []string{
+		localCopy,
 		filepath.Join(filepath.Dir(selfPath), "dockerinit"),
 		// "/usr/libexec includes internal binaries that are not intended to be executed directly by users or shell scripts. Applications may use a single subdirectory under /usr/libexec."
 		"/usr/libexec/docker/dockerinit",
@@ -411,7 +412,7 @@ func (w *WriteBroadcaster) Write(p []byte) (n int, err error) {
 					w.buf.Write([]byte(line))
 					break
 				}
-				b, err := json.Marshal(&JSONLog{Log: line, Stream: sw.stream, Created: time.Now()})
+				b, err := json.Marshal(&JSONLog{Log: line, Stream: sw.stream, Created: time.Now().UTC()})
 				if err != nil {
 					// On error, evict the writer
 					delete(w.writers, sw)
@@ -779,14 +780,19 @@ func NewHTTPRequestError(msg string, res *http.Response) error {
 	}
 }
 
-func (jm *JSONMessage) Display(out io.Writer) error {
+func (jm *JSONMessage) Display(out io.Writer, isTerminal bool) error {
 	if jm.Error != nil {
 		if jm.Error.Code == 401 {
 			return fmt.Errorf("Authentication is required.")
 		}
 		return jm.Error
 	}
-	fmt.Fprintf(out, "%c[2K\r", 27)
+	endl := ""
+	if isTerminal {
+		// <ESC>[2K = erase entire current line
+		fmt.Fprintf(out, "%c[2K\r", 27)
+		endl = "\r"
+	}
 	if jm.Time != 0 {
 		fmt.Fprintf(out, "[%s] ", time.Unix(jm.Time, 0))
 	}
@@ -797,14 +803,14 @@ func (jm *JSONMessage) Display(out io.Writer) error {
 		fmt.Fprintf(out, "(from %s) ", jm.From)
 	}
 	if jm.Progress != "" {
-		fmt.Fprintf(out, "%s %s\r", jm.Status, jm.Progress)
+		fmt.Fprintf(out, "%s %s%s", jm.Status, jm.Progress, endl)
 	} else {
-		fmt.Fprintf(out, "%s\r\n", jm.Status)
+		fmt.Fprintf(out, "%s%s\n", jm.Status, endl)
 	}
 	return nil
 }
 
-func DisplayJSONMessagesStream(in io.Reader, out io.Writer) error {
+func DisplayJSONMessagesStream(in io.Reader, out io.Writer, isTerminal bool) error {
 	dec := json.NewDecoder(in)
 	ids := make(map[string]int)
 	diff := 0
@@ -825,11 +831,17 @@ func DisplayJSONMessagesStream(in io.Reader, out io.Writer) error {
 			} else {
 				diff = len(ids) - line
 			}
-			fmt.Fprintf(out, "%c[%dA", 27, diff)
+			if isTerminal {
+				// <ESC>[{diff}A = move cursor up diff rows
+				fmt.Fprintf(out, "%c[%dA", 27, diff)
+			}
 		}
-		err := jm.Display(out)
+		err := jm.Display(out, isTerminal)
 		if jm.ID != "" {
-			fmt.Fprintf(out, "%c[%dB", 27, diff)
+			if isTerminal {
+				// <ESC>[{diff}B = move cursor down diff rows
+				fmt.Fprintf(out, "%c[%dB", 27, diff)
+			}
 		}
 		if err != nil {
 			return err
@@ -1226,12 +1238,14 @@ func IsClosedError(err error) bool {
 
 func PartParser(template, data string) (map[string]string, error) {
 	// ip:public:private
-	templateParts := strings.Split(template, ":")
-	parts := strings.Split(data, ":")
+	var (
+		templateParts = strings.Split(template, ":")
+		parts         = strings.Split(data, ":")
+		out           = make(map[string]string, len(templateParts))
+	)
 	if len(parts) != len(templateParts) {
 		return nil, fmt.Errorf("Invalid format to parse.  %s should match template %s", data, template)
 	}
-	out := make(map[string]string, len(templateParts))
 
 	for i, t := range templateParts {
 		value := ""
@@ -1279,3 +1293,23 @@ func GetCallerName(depth int) string {
 	callerShortName := parts[len(parts)-1]
 	return callerShortName
 }
+
+func CopyFile(src, dst string) (int64, error) {
+	if src == dst {
+		return 0, nil
+	}
+	sf, err := os.Open(src)
+	if err != nil {
+		return 0, err
+	}
+	defer sf.Close()
+	if err := os.Remove(dst); err != nil && !os.IsNotExist(err) {
+		return 0, err
+	}
+	df, err := os.Create(dst)
+	if err != nil {
+		return 0, err
+	}
+	defer df.Close()
+	return io.Copy(df, sf)
+}

+ 24 - 0
utils_test.go

@@ -0,0 +1,24 @@
+package docker
+
+import (
+	"io"
+	"archive/tar"
+	"bytes"
+)
+
+func fakeTar() (io.Reader, error) {
+       content := []byte("Hello world!\n")
+       buf := new(bytes.Buffer)
+       tw := tar.NewWriter(buf)
+       for _, name := range []string{"/etc/postgres/postgres.conf", "/etc/passwd", "/var/log/postgres/postgres.conf"} {
+               hdr := new(tar.Header)
+               hdr.Size = int64(len(content))
+               hdr.Name = name
+               if err := tw.WriteHeader(hdr); err != nil {
+                       return nil, err
+               }
+               tw.Write([]byte(content))
+       }
+       tw.Close()
+       return buf, nil
+}