소스 검색

Merge pull request #2609 from shykes/0.6.5-dm-plugin

Move aufs to a storage driver, add devicemapper and dummy drivers
Solomon Hykes 11 년 전
부모
커밋
1d903da6fd
55개의 변경된 파일6383개의 추가작업 그리고 601개의 파일을 삭제
  1. 14 5
      Dockerfile
  2. 11 9
      api_params.go
  3. 112 16
      archive/archive.go
  4. 4 4
      archive/archive_test.go
  5. 317 0
      archive/changes.go
  6. 298 0
      archive/changes_test.go
  7. 95 0
      archive/diff.go
  8. 1 1
      buildfile.go
  9. 0 106
      changes.go
  10. 13 7
      commands.go
  11. 2 0
      config.go
  12. 68 88
      container.go
  13. 170 0
      contrib/docker-device-tool/device_tool.go
  14. 12 10
      docker/docker.go
  15. 1 1
      docs/Dockerfile
  16. 32 0
      docs/sources/commandline/cli.rst
  17. 2 13
      docs/sources/installation/kernel.rst
  18. 6 8
      docs/sources/installation/ubuntulinux.rst
  19. 62 43
      graph.go
  20. 18 8
      graph_test.go
  21. 0 0
      graphdb/MAINTAINERS
  22. 1 1
      graphdb/graphdb.go
  23. 1 1
      graphdb/graphdb_test.go
  24. 1 1
      graphdb/sort.go
  25. 1 1
      graphdb/sort_test.go
  26. 1 1
      graphdb/utils.go
  27. 336 0
      graphdriver/aufs/aufs.go
  28. 623 0
      graphdriver/aufs/aufs_test.go
  29. 46 0
      graphdriver/aufs/dirs.go
  30. 194 0
      graphdriver/aufs/migrate.go
  31. 3 19
      graphdriver/aufs/mount.go
  32. 1 1
      graphdriver/aufs/mount_darwin.go
  33. 1 1
      graphdriver/aufs/mount_linux.go
  34. 956 0
      graphdriver/devmapper/deviceset.go
  35. 576 0
      graphdriver/devmapper/devmapper.go
  36. 106 0
      graphdriver/devmapper/devmapper_doc.go
  37. 13 0
      graphdriver/devmapper/devmapper_log.go
  38. 285 0
      graphdriver/devmapper/devmapper_test.go
  39. 340 0
      graphdriver/devmapper/devmapper_wrapper.go
  40. 126 0
      graphdriver/devmapper/driver.go
  41. 872 0
      graphdriver/devmapper/driver_test.go
  42. 25 0
      graphdriver/devmapper/mount.go
  43. 50 0
      graphdriver/devmapper/sys.go
  44. 90 0
      graphdriver/driver.go
  45. 91 0
      graphdriver/vfs/driver.go
  46. 89 163
      image.go
  47. 4 5
      integration/commands_test.go
  48. 1 1
      integration/container_test.go
  49. 13 11
      integration/graph_test.go
  50. 159 20
      runtime.go
  51. 36 30
      server.go
  52. 12 23
      server_unit_test.go
  53. 9 3
      tags_unit_test.go
  54. 48 0
      utils.go
  55. 35 0
      utils/fs.go

+ 14 - 5
Dockerfile

@@ -23,7 +23,7 @@
 # the case. Therefore, you don't have to disable it anymore.
 # the case. Therefore, you don't have to disable it anymore.
 #
 #
 
 
-docker-version 0.6.1
+docker-version	0.6.1
 from	ubuntu:12.04
 from	ubuntu:12.04
 maintainer	Solomon Hykes <solomon@dotcloud.com>
 maintainer	Solomon Hykes <solomon@dotcloud.com>
 
 
@@ -33,13 +33,13 @@ run	apt-get update
 run	apt-get install -y -q curl
 run	apt-get install -y -q curl
 run	apt-get install -y -q git
 run	apt-get install -y -q git
 run	apt-get install -y -q mercurial
 run	apt-get install -y -q mercurial
-run apt-get install -y -q build-essential libsqlite3-dev
+run	apt-get install -y -q build-essential libsqlite3-dev
 
 
 # Install Go
 # Install Go
 run	curl -s https://go.googlecode.com/files/go1.2rc5.src.tar.gz | tar -v -C /usr/local -xz
 run	curl -s https://go.googlecode.com/files/go1.2rc5.src.tar.gz | tar -v -C /usr/local -xz
 env	PATH	/usr/local/go/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin
 env	PATH	/usr/local/go/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin
 env	GOPATH	/go:/go/src/github.com/dotcloud/docker/vendor
 env	GOPATH	/go:/go/src/github.com/dotcloud/docker/vendor
-run cd /usr/local/go/src && ./make.bash && go install -ldflags '-w -linkmode external -extldflags "-static -Wl,--unresolved-symbols=ignore-in-shared-libs"' -tags netgo -a std
+run	cd /usr/local/go/src && ./make.bash && go install -ldflags '-w -linkmode external -extldflags "-static -Wl,--unresolved-symbols=ignore-in-shared-libs"' -tags netgo -a std
 
 
 # Ubuntu stuff
 # Ubuntu stuff
 run	apt-get install -y -q ruby1.9.3 rubygems libffi-dev
 run	apt-get install -y -q ruby1.9.3 rubygems libffi-dev
@@ -56,11 +56,20 @@ run	apt-get install -y -q iptables
 run	apt-get install -y -q lxc
 run	apt-get install -y -q lxc
 run	apt-get install -y -q aufs-tools
 run	apt-get install -y -q aufs-tools
 
 
+# Get lvm2 source for compiling statically
+run	git clone https://git.fedorahosted.org/git/lvm2.git /usr/local/lvm2 && cd /usr/local/lvm2 && git checkout v2_02_103
+# see https://git.fedorahosted.org/cgit/lvm2.git/refs/tags for release tags
+# note: we can't use "git clone -b" above because it requires at least git 1.7.10 to be able to use that on a tag instead of a branch and we only have 1.7.9.5
+
+# Compile and install lvm2
+run	cd /usr/local/lvm2 && ./configure --enable-static_link && make device-mapper && make install_device-mapper
+# see https://git.fedorahosted.org/cgit/lvm2.git/tree/INSTALL
+
 volume	/var/lib/docker
 volume	/var/lib/docker
 workdir	/go/src/github.com/dotcloud/docker
 workdir	/go/src/github.com/dotcloud/docker
 
 
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
-entrypoint ["hack/dind"]
+entrypoint	["hack/dind"]
 
 
 # Upload docker source
 # Upload docker source
-add	.       /go/src/github.com/dotcloud/docker
+add	.	/go/src/github.com/dotcloud/docker

+ 11 - 9
api_params.go

@@ -33,15 +33,17 @@ type (
 		Debug              bool
 		Debug              bool
 		Containers         int
 		Containers         int
 		Images             int
 		Images             int
-		NFd                int    `json:",omitempty"`
-		NGoroutines        int    `json:",omitempty"`
-		MemoryLimit        bool   `json:",omitempty"`
-		SwapLimit          bool   `json:",omitempty"`
-		IPv4Forwarding     bool   `json:",omitempty"`
-		LXCVersion         string `json:",omitempty"`
-		NEventsListener    int    `json:",omitempty"`
-		KernelVersion      string `json:",omitempty"`
-		IndexServerAddress string `json:",omitempty"`
+		Driver             string      `json:",omitempty"`
+		DriverStatus       [][2]string `json:",omitempty"`
+		NFd                int         `json:",omitempty"`
+		NGoroutines        int         `json:",omitempty"`
+		MemoryLimit        bool        `json:",omitempty"`
+		SwapLimit          bool        `json:",omitempty"`
+		IPv4Forwarding     bool        `json:",omitempty"`
+		LXCVersion         string      `json:",omitempty"`
+		NEventsListener    int         `json:",omitempty"`
+		KernelVersion      string      `json:",omitempty"`
+		IndexServerAddress string      `json:",omitempty"`
 	}
 	}
 
 
 	APITop struct {
 	APITop struct {

+ 112 - 16
archive/archive.go

@@ -15,7 +15,15 @@ import (
 
 
 type Archive io.Reader
 type Archive io.Reader
 
 
-type Compression uint32
+type Compression int
+
+type TarOptions struct {
+	Includes    []string
+	Excludes    []string
+	Recursive   bool
+	Compression Compression
+	CreateFiles []string
+}
 
 
 const (
 const (
 	Uncompressed Compression = iota
 	Uncompressed Compression = iota
@@ -80,20 +88,78 @@ func (compression *Compression) Extension() string {
 // Tar creates an archive from the directory at `path`, and returns it as a
 // Tar creates an archive from the directory at `path`, and returns it as a
 // stream of bytes.
 // stream of bytes.
 func Tar(path string, compression Compression) (io.Reader, error) {
 func Tar(path string, compression Compression) (io.Reader, error) {
-	return TarFilter(path, compression, nil)
+	return TarFilter(path, &TarOptions{Recursive: true, Compression: compression})
+}
+
+func escapeName(name string) string {
+	escaped := make([]byte, 0)
+	for i, c := range []byte(name) {
+		if i == 0 && c == '/' {
+			continue
+		}
+		// all printable chars except "-" which is 0x2d
+		if (0x20 <= c && c <= 0x7E) && c != 0x2d {
+			escaped = append(escaped, c)
+		} else {
+			escaped = append(escaped, fmt.Sprintf("\\%03o", c)...)
+		}
+	}
+	return string(escaped)
 }
 }
 
 
 // Tar creates an archive from the directory at `path`, only including files whose relative
 // Tar creates an archive from the directory at `path`, only including files whose relative
 // paths are included in `filter`. If `filter` is nil, then all files are included.
 // paths are included in `filter`. If `filter` is nil, then all files are included.
-func TarFilter(path string, compression Compression, filter []string) (io.Reader, error) {
-	args := []string{"tar", "--numeric-owner", "-f", "-", "-C", path}
-	if filter == nil {
-		filter = []string{"."}
+func TarFilter(path string, options *TarOptions) (io.Reader, error) {
+	args := []string{"tar", "--numeric-owner", "-f", "-", "-C", path, "-T", "-"}
+	if options.Includes == nil {
+		options.Includes = []string{"."}
 	}
 	}
-	for _, f := range filter {
-		args = append(args, "-c"+compression.Flag(), f)
+	args = append(args, "-c"+options.Compression.Flag())
+
+	for _, exclude := range options.Excludes {
+		args = append(args, fmt.Sprintf("--exclude=%s", exclude))
+	}
+
+	if !options.Recursive {
+		args = append(args, "--no-recursion")
+	}
+
+	files := ""
+	for _, f := range options.Includes {
+		files = files + escapeName(f) + "\n"
+	}
+
+	tmpDir := ""
+
+	if options.CreateFiles != nil {
+		var err error // Can't use := here or we override the outer tmpDir
+		tmpDir, err = ioutil.TempDir("", "docker-tar")
+		if err != nil {
+			return nil, err
+		}
+
+		files = files + "-C" + tmpDir + "\n"
+		for _, f := range options.CreateFiles {
+			path := filepath.Join(tmpDir, f)
+			err := os.MkdirAll(filepath.Dir(path), 0600)
+			if err != nil {
+				return nil, err
+			}
+
+			if file, err := os.OpenFile(path, os.O_CREATE, 0600); err != nil {
+				return nil, err
+			} else {
+				file.Close()
+			}
+			files = files + escapeName(f) + "\n"
+		}
 	}
 	}
-	return CmdStream(exec.Command(args[0], args[1:]...))
+
+	return CmdStream(exec.Command(args[0], args[1:]...), &files, func() {
+		if tmpDir != "" {
+			_ = os.RemoveAll(tmpDir)
+		}
+	})
 }
 }
 
 
 // Untar reads a stream of bytes from `archive`, parses it as a tar archive,
 // Untar reads a stream of bytes from `archive`, parses it as a tar archive,
@@ -101,7 +167,7 @@ func TarFilter(path string, compression Compression, filter []string) (io.Reader
 // The archive may be compressed with one of the following algorithms:
 // The archive may be compressed with one of the following algorithms:
 //  identity (uncompressed), gzip, bzip2, xz.
 //  identity (uncompressed), gzip, bzip2, xz.
 // FIXME: specify behavior when target path exists vs. doesn't exist.
 // FIXME: specify behavior when target path exists vs. doesn't exist.
-func Untar(archive io.Reader, path string) error {
+func Untar(archive io.Reader, path string, options *TarOptions) error {
 	if archive == nil {
 	if archive == nil {
 		return fmt.Errorf("Empty archive")
 		return fmt.Errorf("Empty archive")
 	}
 	}
@@ -123,8 +189,15 @@ func Untar(archive io.Reader, path string) error {
 	compression := DetectCompression(buf)
 	compression := DetectCompression(buf)
 
 
 	utils.Debugf("Archive compression detected: %s", compression.Extension())
 	utils.Debugf("Archive compression detected: %s", compression.Extension())
+	args := []string{"--numeric-owner", "-f", "-", "-C", path, "-x" + compression.Flag()}
+
+	if options != nil {
+		for _, exclude := range options.Excludes {
+			args = append(args, fmt.Sprintf("--exclude=%s", exclude))
+		}
+	}
 
 
-	cmd := exec.Command("tar", "--numeric-owner", "-f", "-", "-C", path, "-x"+compression.Flag())
+	cmd := exec.Command("tar", args...)
 	cmd.Stdin = io.MultiReader(bytes.NewReader(buf), archive)
 	cmd.Stdin = io.MultiReader(bytes.NewReader(buf), archive)
 	// Hardcode locale environment for predictable outcome regardless of host configuration.
 	// Hardcode locale environment for predictable outcome regardless of host configuration.
 	//   (see https://github.com/dotcloud/docker/issues/355)
 	//   (see https://github.com/dotcloud/docker/issues/355)
@@ -141,11 +214,11 @@ func Untar(archive io.Reader, path string) error {
 // TarUntar aborts and returns the error.
 // TarUntar aborts and returns the error.
 func TarUntar(src string, filter []string, dst string) error {
 func TarUntar(src string, filter []string, dst string) error {
 	utils.Debugf("TarUntar(%s %s %s)", src, filter, dst)
 	utils.Debugf("TarUntar(%s %s %s)", src, filter, dst)
-	archive, err := TarFilter(src, Uncompressed, filter)
+	archive, err := TarFilter(src, &TarOptions{Compression: Uncompressed, Includes: filter, Recursive: true})
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
-	return Untar(archive, dst)
+	return Untar(archive, dst, nil)
 }
 }
 
 
 // UntarPath is a convenience function which looks for an archive
 // UntarPath is a convenience function which looks for an archive
@@ -153,7 +226,7 @@ func TarUntar(src string, filter []string, dst string) error {
 func UntarPath(src, dst string) error {
 func UntarPath(src, dst string) error {
 	if archive, err := os.Open(src); err != nil {
 	if archive, err := os.Open(src); err != nil {
 		return err
 		return err
-	} else if err := Untar(archive, dst); err != nil {
+	} else if err := Untar(archive, dst, nil); err != nil {
 		return err
 		return err
 	}
 	}
 	return nil
 	return nil
@@ -222,19 +295,39 @@ func CopyFileWithTar(src, dst string) error {
 		return err
 		return err
 	}
 	}
 	tw.Close()
 	tw.Close()
-	return Untar(buf, filepath.Dir(dst))
+	return Untar(buf, filepath.Dir(dst), nil)
 }
 }
 
 
 // CmdStream executes a command, and returns its stdout as a stream.
 // CmdStream executes a command, and returns its stdout as a stream.
 // If the command fails to run or doesn't complete successfully, an error
 // If the command fails to run or doesn't complete successfully, an error
 // will be returned, including anything written on stderr.
 // will be returned, including anything written on stderr.
-func CmdStream(cmd *exec.Cmd) (io.Reader, error) {
+func CmdStream(cmd *exec.Cmd, input *string, atEnd func()) (io.Reader, error) {
+	if input != nil {
+		stdin, err := cmd.StdinPipe()
+		if err != nil {
+			if atEnd != nil {
+				atEnd()
+			}
+			return nil, err
+		}
+		// Write stdin if any
+		go func() {
+			_, _ = stdin.Write([]byte(*input))
+			stdin.Close()
+		}()
+	}
 	stdout, err := cmd.StdoutPipe()
 	stdout, err := cmd.StdoutPipe()
 	if err != nil {
 	if err != nil {
+		if atEnd != nil {
+			atEnd()
+		}
 		return nil, err
 		return nil, err
 	}
 	}
 	stderr, err := cmd.StderrPipe()
 	stderr, err := cmd.StderrPipe()
 	if err != nil {
 	if err != nil {
+		if atEnd != nil {
+			atEnd()
+		}
 		return nil, err
 		return nil, err
 	}
 	}
 	pipeR, pipeW := io.Pipe()
 	pipeR, pipeW := io.Pipe()
@@ -259,6 +352,9 @@ func CmdStream(cmd *exec.Cmd) (io.Reader, error) {
 		} else {
 		} else {
 			pipeW.Close()
 			pipeW.Close()
 		}
 		}
+		if atEnd != nil {
+			atEnd()
+		}
 	}()
 	}()
 	// Run the command and return the pipe
 	// Run the command and return the pipe
 	if err := cmd.Start(); err != nil {
 	if err := cmd.Start(); err != nil {

+ 4 - 4
archive/archive_test.go

@@ -14,7 +14,7 @@ import (
 
 
 func TestCmdStreamLargeStderr(t *testing.T) {
 func TestCmdStreamLargeStderr(t *testing.T) {
 	cmd := exec.Command("/bin/sh", "-c", "dd if=/dev/zero bs=1k count=1000 of=/dev/stderr; echo hello")
 	cmd := exec.Command("/bin/sh", "-c", "dd if=/dev/zero bs=1k count=1000 of=/dev/stderr; echo hello")
-	out, err := CmdStream(cmd)
+	out, err := CmdStream(cmd, nil, nil)
 	if err != nil {
 	if err != nil {
 		t.Fatalf("Failed to start command: %s", err)
 		t.Fatalf("Failed to start command: %s", err)
 	}
 	}
@@ -35,7 +35,7 @@ func TestCmdStreamLargeStderr(t *testing.T) {
 
 
 func TestCmdStreamBad(t *testing.T) {
 func TestCmdStreamBad(t *testing.T) {
 	badCmd := exec.Command("/bin/sh", "-c", "echo hello; echo >&2 error couldn\\'t reverse the phase pulser; exit 1")
 	badCmd := exec.Command("/bin/sh", "-c", "echo hello; echo >&2 error couldn\\'t reverse the phase pulser; exit 1")
-	out, err := CmdStream(badCmd)
+	out, err := CmdStream(badCmd, nil, nil)
 	if err != nil {
 	if err != nil {
 		t.Fatalf("Failed to start command: %s", err)
 		t.Fatalf("Failed to start command: %s", err)
 	}
 	}
@@ -50,7 +50,7 @@ func TestCmdStreamBad(t *testing.T) {
 
 
 func TestCmdStreamGood(t *testing.T) {
 func TestCmdStreamGood(t *testing.T) {
 	cmd := exec.Command("/bin/sh", "-c", "echo hello; exit 0")
 	cmd := exec.Command("/bin/sh", "-c", "echo hello; exit 0")
-	out, err := CmdStream(cmd)
+	out, err := CmdStream(cmd, nil, nil)
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
@@ -83,7 +83,7 @@ func tarUntar(t *testing.T, origin string, compression Compression) error {
 		return err
 		return err
 	}
 	}
 	defer os.RemoveAll(tmp)
 	defer os.RemoveAll(tmp)
-	if err := Untar(archive, tmp); err != nil {
+	if err := Untar(archive, tmp, nil); err != nil {
 		return err
 		return err
 	}
 	}
 	if _, err := os.Stat(tmp); err != nil {
 	if _, err := os.Stat(tmp); err != nil {

+ 317 - 0
archive/changes.go

@@ -0,0 +1,317 @@
+package archive
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"syscall"
+)
+
+type ChangeType int
+
+const (
+	ChangeModify = iota
+	ChangeAdd
+	ChangeDelete
+)
+
+type Change struct {
+	Path string
+	Kind ChangeType
+}
+
+func (change *Change) String() string {
+	var kind string
+	switch change.Kind {
+	case ChangeModify:
+		kind = "C"
+	case ChangeAdd:
+		kind = "A"
+	case ChangeDelete:
+		kind = "D"
+	}
+	return fmt.Sprintf("%s %s", kind, change.Path)
+}
+
+func Changes(layers []string, rw string) ([]Change, error) {
+	var changes []Change
+	err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Rebase path
+		path, err = filepath.Rel(rw, path)
+		if err != nil {
+			return err
+		}
+		path = filepath.Join("/", path)
+
+		// Skip root
+		if path == "/" {
+			return nil
+		}
+
+		// Skip AUFS metadata
+		if matched, err := filepath.Match("/.wh..wh.*", path); err != nil || matched {
+			return err
+		}
+
+		change := Change{
+			Path: path,
+		}
+
+		// Find out what kind of modification happened
+		file := filepath.Base(path)
+		// If there is a whiteout, then the file was removed
+		if strings.HasPrefix(file, ".wh.") {
+			originalFile := file[len(".wh."):]
+			change.Path = filepath.Join(filepath.Dir(path), originalFile)
+			change.Kind = ChangeDelete
+		} else {
+			// Otherwise, the file was added
+			change.Kind = ChangeAdd
+
+			// ...Unless it already existed in a top layer, in which case, it's a modification
+			for _, layer := range layers {
+				stat, err := os.Stat(filepath.Join(layer, path))
+				if err != nil && !os.IsNotExist(err) {
+					return err
+				}
+				if err == nil {
+					// The file existed in the top layer, so that's a modification
+
+					// However, if it's a directory, maybe it wasn't actually modified.
+					// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
+					if stat.IsDir() && f.IsDir() {
+						if f.Size() == stat.Size() && f.Mode() == stat.Mode() && f.ModTime() == stat.ModTime() {
+							// Both directories are the same, don't record the change
+							return nil
+						}
+					}
+					change.Kind = ChangeModify
+					break
+				}
+			}
+		}
+
+		// Record change
+		changes = append(changes, change)
+		return nil
+	})
+	if err != nil && !os.IsNotExist(err) {
+		return nil, err
+	}
+	return changes, nil
+}
+
+type FileInfo struct {
+	parent   *FileInfo
+	name     string
+	stat     syscall.Stat_t
+	children map[string]*FileInfo
+}
+
+func (root *FileInfo) LookUp(path string) *FileInfo {
+	parent := root
+	if path == "/" {
+		return root
+	}
+
+	pathElements := strings.Split(path, "/")
+	for _, elem := range pathElements {
+		if elem != "" {
+			child := parent.children[elem]
+			if child == nil {
+				return nil
+			}
+			parent = child
+		}
+	}
+	return parent
+}
+
+func (info *FileInfo) path() string {
+	if info.parent == nil {
+		return "/"
+	}
+	return filepath.Join(info.parent.path(), info.name)
+}
+
+func (info *FileInfo) isDir() bool {
+	return info.parent == nil || info.stat.Mode&syscall.S_IFDIR == syscall.S_IFDIR
+}
+
+func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
+	if oldInfo == nil {
+		// add
+		change := Change{
+			Path: info.path(),
+			Kind: ChangeAdd,
+		}
+		*changes = append(*changes, change)
+	}
+
+	// We make a copy so we can modify it to detect additions
+	// also, we only recurse on the old dir if the new info is a directory
+	// otherwise any previous delete/change is considered recursive
+	oldChildren := make(map[string]*FileInfo)
+	if oldInfo != nil && info.isDir() {
+		for k, v := range oldInfo.children {
+			oldChildren[k] = v
+		}
+	}
+
+	for name, newChild := range info.children {
+		oldChild, _ := oldChildren[name]
+		if oldChild != nil {
+			// change?
+			oldStat := &oldChild.stat
+			newStat := &newChild.stat
+			// Note: We can't compare inode or ctime or blocksize here, because these change
+			// when copying a file into a container. However, that is not generally a problem
+			// because any content change will change mtime, and any status change should
+			// be visible when actually comparing the stat fields. The only time this
+			// breaks down is if some code intentionally hides a change by setting
+			// back mtime
+			if oldStat.Mode != newStat.Mode ||
+				oldStat.Uid != newStat.Uid ||
+				oldStat.Gid != newStat.Gid ||
+				oldStat.Rdev != newStat.Rdev ||
+				// Don't look at size for dirs, its not a good measure of change
+				(oldStat.Size != newStat.Size && oldStat.Mode&syscall.S_IFDIR != syscall.S_IFDIR) ||
+				oldStat.Mtim != newStat.Mtim {
+				change := Change{
+					Path: newChild.path(),
+					Kind: ChangeModify,
+				}
+				*changes = append(*changes, change)
+			}
+
+			// Remove from copy so we can detect deletions
+			delete(oldChildren, name)
+		}
+
+		newChild.addChanges(oldChild, changes)
+	}
+	for _, oldChild := range oldChildren {
+		// delete
+		change := Change{
+			Path: oldChild.path(),
+			Kind: ChangeDelete,
+		}
+		*changes = append(*changes, change)
+	}
+
+}
+
+func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
+	var changes []Change
+
+	info.addChanges(oldInfo, &changes)
+
+	return changes
+}
+
+func newRootFileInfo() *FileInfo {
+	root := &FileInfo{
+		name:     "/",
+		children: make(map[string]*FileInfo),
+	}
+	return root
+}
+
+func collectFileInfo(sourceDir string) (*FileInfo, error) {
+	root := newRootFileInfo()
+
+	err := filepath.Walk(sourceDir, func(path string, f os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Rebase path
+		relPath, err := filepath.Rel(sourceDir, path)
+		if err != nil {
+			return err
+		}
+		relPath = filepath.Join("/", relPath)
+
+		if relPath == "/" {
+			return nil
+		}
+
+		parent := root.LookUp(filepath.Dir(relPath))
+		if parent == nil {
+			return fmt.Errorf("collectFileInfo: Unexpectedly no parent for %s", relPath)
+		}
+
+		info := &FileInfo{
+			name:     filepath.Base(relPath),
+			children: make(map[string]*FileInfo),
+			parent:   parent,
+		}
+
+		if err := syscall.Lstat(path, &info.stat); err != nil {
+			return err
+		}
+
+		parent.children[info.name] = info
+
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	return root, nil
+}
+
+// Compare two directories and generate an array of Change objects describing the changes
+func ChangesDirs(newDir, oldDir string) ([]Change, error) {
+	oldRoot, err := collectFileInfo(oldDir)
+	if err != nil {
+		return nil, err
+	}
+	newRoot, err := collectFileInfo(newDir)
+	if err != nil {
+		return nil, err
+	}
+
+	return newRoot.Changes(oldRoot), nil
+}
+
+func ChangesSize(newDir string, changes []Change) int64 {
+	var size int64
+	for _, change := range changes {
+		if change.Kind == ChangeModify || change.Kind == ChangeAdd {
+			file := filepath.Join(newDir, change.Path)
+			fileInfo, _ := os.Lstat(file)
+			if fileInfo != nil && !fileInfo.IsDir() {
+				size += fileInfo.Size()
+			}
+		}
+	}
+	return size
+}
+
+func ExportChanges(dir string, changes []Change) (Archive, error) {
+	files := make([]string, 0)
+	deletions := make([]string, 0)
+	for _, change := range changes {
+		if change.Kind == ChangeModify || change.Kind == ChangeAdd {
+			files = append(files, change.Path)
+		}
+		if change.Kind == ChangeDelete {
+			base := filepath.Base(change.Path)
+			dir := filepath.Dir(change.Path)
+			deletions = append(deletions, filepath.Join(dir, ".wh."+base))
+		}
+	}
+	// FIXME: Why do we create whiteout files inside Tar code ?
+	return TarFilter(dir, &TarOptions{
+		Compression: Uncompressed,
+		Includes:    files,
+		Recursive:   false,
+		CreateFiles: deletions,
+	})
+}

+ 298 - 0
archive/changes_test.go

@@ -0,0 +1,298 @@
+package archive
+
+import (
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path"
+	"sort"
+	"testing"
+	"time"
+)
+
+func max(x, y int) int {
+	if x >= y {
+		return x
+	}
+	return y
+}
+
+func copyDir(src, dst string) error {
+	cmd := exec.Command("cp", "-a", src, dst)
+	if err := cmd.Run(); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Helper to sort []Change by path
+type byPath struct{ changes []Change }
+
+func (b byPath) Less(i, j int) bool { return b.changes[i].Path < b.changes[j].Path }
+func (b byPath) Len() int           { return len(b.changes) }
+func (b byPath) Swap(i, j int)      { b.changes[i], b.changes[j] = b.changes[j], b.changes[i] }
+
+type FileType uint32
+
+const (
+	Regular FileType = iota
+	Dir
+	Symlink
+)
+
+type FileData struct {
+	filetype    FileType
+	path        string
+	contents    string
+	permissions os.FileMode
+}
+
+func createSampleDir(t *testing.T, root string) {
+	files := []FileData{
+		{Regular, "file1", "file1\n", 0600},
+		{Regular, "file2", "file2\n", 0666},
+		{Regular, "file3", "file3\n", 0404},
+		{Regular, "file4", "file4\n", 0600},
+		{Regular, "file5", "file5\n", 0600},
+		{Regular, "file6", "file6\n", 0600},
+		{Regular, "file7", "file7\n", 0600},
+		{Dir, "dir1", "", 0740},
+		{Regular, "dir1/file1-1", "file1-1\n", 01444},
+		{Regular, "dir1/file1-2", "file1-2\n", 0666},
+		{Dir, "dir2", "", 0700},
+		{Regular, "dir2/file2-1", "file2-1\n", 0666},
+		{Regular, "dir2/file2-2", "file2-2\n", 0666},
+		{Dir, "dir3", "", 0700},
+		{Regular, "dir3/file3-1", "file3-1\n", 0666},
+		{Regular, "dir3/file3-2", "file3-2\n", 0666},
+		{Dir, "dir4", "", 0700},
+		{Regular, "dir4/file3-1", "file4-1\n", 0666},
+		{Regular, "dir4/file3-2", "file4-2\n", 0666},
+		{Symlink, "symlink1", "target1", 0666},
+		{Symlink, "symlink2", "target2", 0666},
+	}
+	for _, info := range files {
+		if info.filetype == Dir {
+			if err := os.MkdirAll(path.Join(root, info.path), info.permissions); err != nil {
+				t.Fatal(err)
+			}
+		} else if info.filetype == Regular {
+			if err := ioutil.WriteFile(path.Join(root, info.path), []byte(info.contents), info.permissions); err != nil {
+				t.Fatal(err)
+			}
+		} else if info.filetype == Symlink {
+			if err := os.Symlink(info.contents, path.Join(root, info.path)); err != nil {
+				t.Fatal(err)
+			}
+		}
+	}
+}
+
+// Create an directory, copy it, make sure we report no changes between the two
+func TestChangesDirsEmpty(t *testing.T) {
+	src, err := ioutil.TempDir("", "docker-changes-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	createSampleDir(t, src)
+	dst := src + "-copy"
+	if err := copyDir(src, dst); err != nil {
+		t.Fatal(err)
+	}
+	changes, err := ChangesDirs(dst, src)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(changes) != 0 {
+		t.Fatalf("Reported changes for identical dirs: %v", changes)
+	}
+	os.RemoveAll(src)
+	os.RemoveAll(dst)
+}
+
+func mutateSampleDir(t *testing.T, root string) {
+	// Remove a regular file
+	if err := os.RemoveAll(path.Join(root, "file1")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Remove a directory
+	if err := os.RemoveAll(path.Join(root, "dir1")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Remove a symlink
+	if err := os.RemoveAll(path.Join(root, "symlink1")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Rewrite a file
+	if err := ioutil.WriteFile(path.Join(root, "file2"), []byte("fileN\n"), 0777); err != nil {
+		t.Fatal(err)
+	}
+
+	// Replace a file
+	if err := os.RemoveAll(path.Join(root, "file3")); err != nil {
+		t.Fatal(err)
+	}
+	if err := ioutil.WriteFile(path.Join(root, "file3"), []byte("fileM\n"), 0404); err != nil {
+		t.Fatal(err)
+	}
+
+	// Touch file
+	if err := os.Chtimes(path.Join(root, "file4"), time.Now(), time.Now()); err != nil {
+		t.Fatal(err)
+	}
+
+	// Replace file with dir
+	if err := os.RemoveAll(path.Join(root, "file5")); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(path.Join(root, "file5"), 0666); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create new file
+	if err := ioutil.WriteFile(path.Join(root, "filenew"), []byte("filenew\n"), 0777); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create new dir
+	if err := os.MkdirAll(path.Join(root, "dirnew"), 0766); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a new symlink
+	if err := os.Symlink("targetnew", path.Join(root, "symlinknew")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Change a symlink
+	if err := os.RemoveAll(path.Join(root, "symlink2")); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Symlink("target2change", path.Join(root, "symlink2")); err != nil {
+		t.Fatal(err)
+	}
+
+	// Replace dir with file
+	if err := os.RemoveAll(path.Join(root, "dir2")); err != nil {
+		t.Fatal(err)
+	}
+	if err := ioutil.WriteFile(path.Join(root, "dir2"), []byte("dir2\n"), 0777); err != nil {
+		t.Fatal(err)
+	}
+
+	// Touch dir
+	if err := os.Chtimes(path.Join(root, "dir3"), time.Now(), time.Now()); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestChangesDirsMutated(t *testing.T) {
+	src, err := ioutil.TempDir("", "docker-changes-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	createSampleDir(t, src)
+	dst := src + "-copy"
+	if err := copyDir(src, dst); err != nil {
+		t.Fatal(err)
+	}
+	mutateSampleDir(t, dst)
+
+	changes, err := ChangesDirs(dst, src)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	sort.Sort(byPath{changes})
+
+	expectedChanges := []Change{
+		{"/dir1", ChangeDelete},
+		{"/dir2", ChangeModify},
+		{"/dir3", ChangeModify},
+		{"/dirnew", ChangeAdd},
+		{"/file1", ChangeDelete},
+		{"/file2", ChangeModify},
+		{"/file3", ChangeModify},
+		{"/file4", ChangeModify},
+		{"/file5", ChangeModify},
+		{"/filenew", ChangeAdd},
+		{"/symlink1", ChangeDelete},
+		{"/symlink2", ChangeModify},
+		{"/symlinknew", ChangeAdd},
+	}
+
+	i := 0
+	for ; i < max(len(changes), len(expectedChanges)); i++ {
+		if i >= len(expectedChanges) {
+			t.Fatalf("unexpected change %s\n", changes[i].String())
+		}
+		if i >= len(changes) {
+			t.Fatalf("no change for expected change %s\n", expectedChanges[i].String())
+		}
+		if changes[i].Path == expectedChanges[i].Path {
+			if changes[i] != expectedChanges[i] {
+				t.Fatalf("Wrong change for %s, expected %s, got %d\n", changes[i].Path, changes[i].String(), expectedChanges[i].String())
+			}
+		} else if changes[i].Path < expectedChanges[i].Path {
+			t.Fatalf("unexpected change %s\n", changes[i].String())
+		} else {
+			t.Fatalf("no change for expected change %s\n", expectedChanges[i].String())
+		}
+	}
+	for ; i < len(expectedChanges); i++ {
+	}
+
+	os.RemoveAll(src)
+	os.RemoveAll(dst)
+}
+
+func TestApplyLayer(t *testing.T) {
+	t.Skip("Skipping TestApplyLayer due to known failures") // Disable this for now as it is broken
+	return
+
+	src, err := ioutil.TempDir("", "docker-changes-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	createSampleDir(t, src)
+	dst := src + "-copy"
+	if err := copyDir(src, dst); err != nil {
+		t.Fatal(err)
+	}
+	mutateSampleDir(t, dst)
+
+	changes, err := ChangesDirs(dst, src)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	layer, err := ExportChanges(dst, changes)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	layerCopy, err := NewTempArchive(layer, "")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := ApplyLayer(src, layerCopy); err != nil {
+		t.Fatal(err)
+	}
+
+	changes2, err := ChangesDirs(src, dst)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(changes2) != 0 {
+		t.Fatalf("Unexpected differences after re applying mutation: %v", changes)
+	}
+
+	os.RemoveAll(src)
+	os.RemoveAll(dst)
+}

+ 95 - 0
archive/diff.go

@@ -0,0 +1,95 @@
+package archive
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"syscall"
+	"time"
+)
+
+// ApplyLayer parses a diff in the standard layer format from `layer`, and
+// applies it to the directory `dest`.
+func ApplyLayer(dest string, layer Archive) error {
+	// Poor man's diff applyer in 2 steps:
+
+	// Step 1: untar everything in place
+	if err := Untar(layer, dest, nil); err != nil {
+		return err
+	}
+
+	modifiedDirs := make(map[string]*syscall.Stat_t)
+	addDir := func(file string) {
+		d := filepath.Dir(file)
+		if _, exists := modifiedDirs[d]; !exists {
+			if s, err := os.Lstat(d); err == nil {
+				if sys := s.Sys(); sys != nil {
+					if stat, ok := sys.(*syscall.Stat_t); ok {
+						modifiedDirs[d] = stat
+					}
+				}
+			}
+		}
+	}
+
+	// Step 2: walk for whiteouts and apply them, removing them in the process
+	err := filepath.Walk(dest, func(fullPath string, f os.FileInfo, err error) error {
+		if err != nil {
+			if os.IsNotExist(err) {
+				// This happens in the case of whiteouts in parent dir removing a directory
+				// We just ignore it
+				return filepath.SkipDir
+			}
+			return err
+		}
+
+		// Rebase path
+		path, err := filepath.Rel(dest, fullPath)
+		if err != nil {
+			return err
+		}
+		path = filepath.Join("/", path)
+
+		// Skip AUFS metadata
+		if matched, err := filepath.Match("/.wh..wh.*", path); err != nil {
+			return err
+		} else if matched {
+			addDir(fullPath)
+			if err := os.RemoveAll(fullPath); err != nil {
+				return err
+			}
+		}
+
+		filename := filepath.Base(path)
+		if strings.HasPrefix(filename, ".wh.") {
+			rmTargetName := filename[len(".wh."):]
+			rmTargetPath := filepath.Join(filepath.Dir(fullPath), rmTargetName)
+
+			// Remove the file targeted by the whiteout
+			addDir(rmTargetPath)
+			if err := os.RemoveAll(rmTargetPath); err != nil {
+				return err
+			}
+			// Remove the whiteout itself
+			addDir(fullPath)
+			if err := os.RemoveAll(fullPath); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	for k, v := range modifiedDirs {
+		aTime := time.Unix(v.Atim.Unix())
+		mTime := time.Unix(v.Mtim.Unix())
+
+		if err := os.Chtimes(k, aTime, mTime); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}

+ 1 - 1
buildfile.go

@@ -476,7 +476,7 @@ func (b *buildFile) Build(context io.Reader) (string, error) {
 	if err != nil {
 	if err != nil {
 		return "", err
 		return "", err
 	}
 	}
-	if err := archive.Untar(context, name); err != nil {
+	if err := archive.Untar(context, name, nil); err != nil {
 		return "", err
 		return "", err
 	}
 	}
 	defer os.RemoveAll(name)
 	defer os.RemoveAll(name)

+ 0 - 106
changes.go

@@ -1,106 +0,0 @@
-package docker
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-)
-
-type ChangeType int
-
-const (
-	ChangeModify = iota
-	ChangeAdd
-	ChangeDelete
-)
-
-type Change struct {
-	Path string
-	Kind ChangeType
-}
-
-func (change *Change) String() string {
-	var kind string
-	switch change.Kind {
-	case ChangeModify:
-		kind = "C"
-	case ChangeAdd:
-		kind = "A"
-	case ChangeDelete:
-		kind = "D"
-	}
-	return fmt.Sprintf("%s %s", kind, change.Path)
-}
-
-func Changes(layers []string, rw string) ([]Change, error) {
-	var changes []Change
-	err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
-		if err != nil {
-			return err
-		}
-
-		// Rebase path
-		path, err = filepath.Rel(rw, path)
-		if err != nil {
-			return err
-		}
-		path = filepath.Join("/", path)
-
-		// Skip root
-		if path == "/" {
-			return nil
-		}
-
-		// Skip AUFS metadata
-		if matched, err := filepath.Match("/.wh..wh.*", path); err != nil || matched {
-			return err
-		}
-
-		change := Change{
-			Path: path,
-		}
-
-		// Find out what kind of modification happened
-		file := filepath.Base(path)
-		// If there is a whiteout, then the file was removed
-		if strings.HasPrefix(file, ".wh.") {
-			originalFile := file[len(".wh."):]
-			change.Path = filepath.Join(filepath.Dir(path), originalFile)
-			change.Kind = ChangeDelete
-		} else {
-			// Otherwise, the file was added
-			change.Kind = ChangeAdd
-
-			// ...Unless it already existed in a top layer, in which case, it's a modification
-			for _, layer := range layers {
-				stat, err := os.Stat(filepath.Join(layer, path))
-				if err != nil && !os.IsNotExist(err) {
-					return err
-				}
-				if err == nil {
-					// The file existed in the top layer, so that's a modification
-
-					// However, if it's a directory, maybe it wasn't actually modified.
-					// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
-					if stat.IsDir() && f.IsDir() {
-						if f.Size() == stat.Size() && f.Mode() == stat.Mode() && f.ModTime() == stat.ModTime() {
-							// Both directories are the same, don't record the change
-							return nil
-						}
-					}
-					change.Kind = ChangeModify
-					break
-				}
-			}
-		}
-
-		// Record change
-		changes = append(changes, change)
-		return nil
-	})
-	if err != nil && !os.IsNotExist(err) {
-		return nil, err
-	}
-	return changes, nil
-}

+ 13 - 7
commands.go

@@ -463,6 +463,10 @@ func (cli *DockerCli) CmdInfo(args ...string) error {
 
 
 	fmt.Fprintf(cli.out, "Containers: %d\n", out.Containers)
 	fmt.Fprintf(cli.out, "Containers: %d\n", out.Containers)
 	fmt.Fprintf(cli.out, "Images: %d\n", out.Images)
 	fmt.Fprintf(cli.out, "Images: %d\n", out.Images)
+	fmt.Fprintf(cli.out, "Driver: %s\n", out.Driver)
+	for _, pair := range out.DriverStatus {
+		fmt.Fprintf(cli.out, " %s: %s\n", pair[0], pair[1])
+	}
 	if out.Debug || os.Getenv("DEBUG") != "" {
 	if out.Debug || os.Getenv("DEBUG") != "" {
 		fmt.Fprintf(cli.out, "Debug mode (server): %v\n", out.Debug)
 		fmt.Fprintf(cli.out, "Debug mode (server): %v\n", out.Debug)
 		fmt.Fprintf(cli.out, "Debug mode (client): %v\n", os.Getenv("DEBUG") != "")
 		fmt.Fprintf(cli.out, "Debug mode (client): %v\n", os.Getenv("DEBUG") != "")
@@ -1128,16 +1132,18 @@ func (cli *DockerCli) CmdImages(args ...string) error {
 		}
 		}
 
 
 		var outs []APIImages
 		var outs []APIImages
-		err = json.Unmarshal(body, &outs)
-		if err != nil {
+		if err := json.Unmarshal(body, &outs); err != nil {
 			return err
 			return err
 		}
 		}
 
 
-		var startImageArg = cmd.Arg(0)
-		var startImage APIImages
+		var (
+			startImageArg = cmd.Arg(0)
+			startImage    APIImages
+
+			roots    []APIImages
+			byParent = make(map[string][]APIImages)
+		)
 
 
-		var roots []APIImages
-		var byParent = make(map[string][]APIImages)
 		for _, image := range outs {
 		for _, image := range outs {
 			if image.ParentId == "" {
 			if image.ParentId == "" {
 				roots = append(roots, image)
 				roots = append(roots, image)
@@ -2181,7 +2187,7 @@ func (cli *DockerCli) CmdCp(args ...string) error {
 
 
 	if statusCode == 200 {
 	if statusCode == 200 {
 		r := bytes.NewReader(data)
 		r := bytes.NewReader(data)
-		if err := archive.Untar(r, copyData.HostPath); err != nil {
+		if err := archive.Untar(r, copyData.HostPath, nil); err != nil {
 			return err
 			return err
 		}
 		}
 	}
 	}

+ 2 - 0
config.go

@@ -16,6 +16,7 @@ type DaemonConfig struct {
 	BridgeIface                 string
 	BridgeIface                 string
 	DefaultIp                   net.IP
 	DefaultIp                   net.IP
 	InterContainerCommunication bool
 	InterContainerCommunication bool
+	GraphDriver                 string
 }
 }
 
 
 // ConfigFromJob creates and returns a new DaemonConfig object
 // ConfigFromJob creates and returns a new DaemonConfig object
@@ -37,5 +38,6 @@ func ConfigFromJob(job *engine.Job) *DaemonConfig {
 	}
 	}
 	config.DefaultIp = net.ParseIP(job.Getenv("DefaultIp"))
 	config.DefaultIp = net.ParseIP(job.Getenv("DefaultIp"))
 	config.InterContainerCommunication = job.GetenvBool("InterContainerCommunication")
 	config.InterContainerCommunication = job.GetenvBool("InterContainerCommunication")
+	config.GraphDriver = job.Getenv("GraphDriver")
 	return &config
 	return &config
 }
 }

+ 68 - 88
container.go

@@ -6,6 +6,7 @@ import (
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
 	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/term"
 	"github.com/dotcloud/docker/term"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 	"github.com/kr/pty"
 	"github.com/kr/pty"
@@ -16,7 +17,6 @@ import (
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
 	"path"
 	"path"
-	"path/filepath"
 	"strconv"
 	"strconv"
 	"strings"
 	"strings"
 	"sync"
 	"sync"
@@ -26,8 +26,8 @@ import (
 
 
 type Container struct {
 type Container struct {
 	sync.Mutex
 	sync.Mutex
-
-	root string
+	root   string // Path to the "home" of the container, including metadata.
+	rootfs string // Path to the root filesystem of the container.
 
 
 	ID string
 	ID string
 
 
@@ -48,6 +48,7 @@ type Container struct {
 	HostnamePath   string
 	HostnamePath   string
 	HostsPath      string
 	HostsPath      string
 	Name           string
 	Name           string
+	Driver         string
 
 
 	cmd       *exec.Cmd
 	cmd       *exec.Cmd
 	stdout    *utils.WriteBroadcaster
 	stdout    *utils.WriteBroadcaster
@@ -196,8 +197,13 @@ func (settings *NetworkSettings) PortMappingAPI() []APIPort {
 
 
 // Inject the io.Reader at the given path. Note: do not close the reader
 // Inject the io.Reader at the given path. Note: do not close the reader
 func (container *Container) Inject(file io.Reader, pth string) error {
 func (container *Container) Inject(file io.Reader, pth string) error {
+	if err := container.EnsureMounted(); err != nil {
+		return fmt.Errorf("inject: error mounting container %s: %s", container.ID, err)
+	}
+
 	// Return error if path exists
 	// Return error if path exists
-	if _, err := os.Stat(path.Join(container.rwPath(), pth)); err == nil {
+	destPath := path.Join(container.RootfsPath(), pth)
+	if _, err := os.Stat(destPath); err == nil {
 		// Since err is nil, the path could be stat'd and it exists
 		// Since err is nil, the path could be stat'd and it exists
 		return fmt.Errorf("%s exists", pth)
 		return fmt.Errorf("%s exists", pth)
 	} else if !os.IsNotExist(err) {
 	} else if !os.IsNotExist(err) {
@@ -208,14 +214,16 @@ func (container *Container) Inject(file io.Reader, pth string) error {
 	}
 	}
 
 
 	// Make sure the directory exists
 	// Make sure the directory exists
-	if err := os.MkdirAll(path.Join(container.rwPath(), path.Dir(pth)), 0755); err != nil {
+	if err := os.MkdirAll(path.Join(container.RootfsPath(), path.Dir(pth)), 0755); err != nil {
 		return err
 		return err
 	}
 	}
 
 
-	dest, err := os.Create(path.Join(container.rwPath(), pth))
+	dest, err := os.Create(destPath)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
+	defer dest.Close()
+
 	if _, err := io.Copy(dest, file); err != nil {
 	if _, err := io.Copy(dest, file); err != nil {
 		return err
 		return err
 	}
 	}
@@ -607,6 +615,7 @@ func (container *Container) Start() (err error) {
 		}
 		}
 	}
 	}
 
 
+	volumesDriver := container.runtime.volumes.driver
 	// Create the requested volumes if they don't exist
 	// Create the requested volumes if they don't exist
 	for volPath := range container.Config.Volumes {
 	for volPath := range container.Config.Volumes {
 		volPath = path.Clean(volPath)
 		volPath = path.Clean(volPath)
@@ -626,13 +635,17 @@ func (container *Container) Start() (err error) {
 			}
 			}
 			// Otherwise create an directory in $ROOT/volumes/ and use that
 			// Otherwise create an directory in $ROOT/volumes/ and use that
 		} else {
 		} else {
-			c, err := container.runtime.volumes.Create(nil, container, "", "", nil)
+
+			// Do not pass a container as the parameter for the volume creation.
+			// The graph driver using the container's information ( Image ) to
+			// create the parent.
+			c, err := container.runtime.volumes.Create(nil, nil, "", "", nil)
 			if err != nil {
 			if err != nil {
 				return err
 				return err
 			}
 			}
-			srcPath, err = c.layer()
+			srcPath, err = volumesDriver.Get(c.ID)
 			if err != nil {
 			if err != nil {
-				return err
+				return fmt.Errorf("Driver %s failed to get volume rootfs %s: %s", volumesDriver, c.ID, err)
 			}
 			}
 			srcRW = true // RW by default
 			srcRW = true // RW by default
 		}
 		}
@@ -1231,15 +1244,14 @@ func (container *Container) Resize(h, w int) error {
 }
 }
 
 
 func (container *Container) ExportRw() (archive.Archive, error) {
 func (container *Container) ExportRw() (archive.Archive, error) {
-	return archive.Tar(container.rwPath(), archive.Uncompressed)
-}
-
-func (container *Container) RwChecksum() (string, error) {
-	rwData, err := archive.Tar(container.rwPath(), archive.Xz)
-	if err != nil {
-		return "", err
+	if err := container.EnsureMounted(); err != nil {
+		return nil, err
+	}
+	if container.runtime == nil {
+		return nil, fmt.Errorf("Can't load storage driver for unregistered container %s", container.ID)
 	}
 	}
-	return utils.HashData(rwData)
+
+	return container.runtime.Diff(container)
 }
 }
 
 
 func (container *Container) Export() (archive.Archive, error) {
 func (container *Container) Export() (archive.Archive, error) {
@@ -1265,28 +1277,17 @@ func (container *Container) WaitTimeout(timeout time.Duration) error {
 }
 }
 
 
 func (container *Container) EnsureMounted() error {
 func (container *Container) EnsureMounted() error {
-	if mounted, err := container.Mounted(); err != nil {
-		return err
-	} else if mounted {
-		return nil
-	}
+	// FIXME: EnsureMounted is deprecated because drivers are now responsible
+	// for re-entrant mounting in their Get() method.
 	return container.Mount()
 	return container.Mount()
 }
 }
 
 
 func (container *Container) Mount() error {
 func (container *Container) Mount() error {
-	image, err := container.GetImage()
-	if err != nil {
-		return err
-	}
-	return image.Mount(container.RootfsPath(), container.rwPath())
+	return container.runtime.Mount(container)
 }
 }
 
 
-func (container *Container) Changes() ([]Change, error) {
-	image, err := container.GetImage()
-	if err != nil {
-		return nil, err
-	}
-	return image.Changes(container.rwPath())
+func (container *Container) Changes() ([]archive.Change, error) {
+	return container.runtime.Changes(container)
 }
 }
 
 
 func (container *Container) GetImage() (*Image, error) {
 func (container *Container) GetImage() (*Image, error) {
@@ -1296,18 +1297,8 @@ func (container *Container) GetImage() (*Image, error) {
 	return container.runtime.graph.Get(container.Image)
 	return container.runtime.graph.Get(container.Image)
 }
 }
 
 
-func (container *Container) Mounted() (bool, error) {
-	return Mounted(container.RootfsPath())
-}
-
 func (container *Container) Unmount() error {
 func (container *Container) Unmount() error {
-	if _, err := os.Stat(container.RootfsPath()); err != nil {
-		if os.IsNotExist(err) {
-			return nil
-		}
-		return err
-	}
-	return Unmount(container.RootfsPath())
+	return container.runtime.Unmount(container)
 }
 }
 
 
 func (container *Container) logPath(name string) string {
 func (container *Container) logPath(name string) string {
@@ -1336,11 +1327,7 @@ func (container *Container) lxcConfigPath() string {
 
 
 // This method must be exported to be used from the lxc template
 // This method must be exported to be used from the lxc template
 func (container *Container) RootfsPath() string {
 func (container *Container) RootfsPath() string {
-	return path.Join(container.root, "rootfs")
-}
-
-func (container *Container) rwPath() string {
-	return path.Join(container.root, "rw")
+	return container.rootfs
 }
 }
 
 
 func validateID(id string) error {
 func validateID(id string) error {
@@ -1352,49 +1339,38 @@ func validateID(id string) error {
 
 
 // GetSize, return real size, virtual size
 // GetSize, return real size, virtual size
 func (container *Container) GetSize() (int64, int64) {
 func (container *Container) GetSize() (int64, int64) {
-	var sizeRw, sizeRootfs int64
-	data := make(map[uint64]bool)
+	var (
+		sizeRw, sizeRootfs int64
+		err                error
+		driver             = container.runtime.driver
+	)
 
 
-	filepath.Walk(container.rwPath(), func(path string, fileInfo os.FileInfo, err error) error {
-		if fileInfo == nil {
-			return nil
+	if err := container.EnsureMounted(); err != nil {
+		utils.Errorf("Warning: failed to compute size of container rootfs %s: %s", container.ID, err)
+		return sizeRw, sizeRootfs
+	}
+
+	if differ, ok := container.runtime.driver.(graphdriver.Differ); ok {
+		sizeRw, err = differ.DiffSize(container.ID)
+		if err != nil {
+			utils.Errorf("Warning: driver %s couldn't return diff size of container %s: %s", driver, container.ID, err)
+			// FIXME: GetSize should return an error. Not changing it now in case
+			// there is a side-effect.
+			sizeRw = -1
 		}
 		}
-		size := fileInfo.Size()
-		if size == 0 {
-			return nil
+	} else {
+		changes, _ := container.Changes()
+		if changes != nil {
+			sizeRw = archive.ChangesSize(container.RootfsPath(), changes)
+		} else {
+			sizeRw = -1
 		}
 		}
+	}
 
 
-		inode := fileInfo.Sys().(*syscall.Stat_t).Ino
-		if _, entryExists := data[inode]; entryExists {
-			return nil
+	if _, err = os.Stat(container.RootfsPath()); err != nil {
+		if sizeRootfs, err = utils.TreeSize(container.RootfsPath()); err != nil {
+			sizeRootfs = -1
 		}
 		}
-		data[inode] = false
-
-		sizeRw += size
-		return nil
-	})
-
-	data = make(map[uint64]bool)
-	_, err := os.Stat(container.RootfsPath())
-	if err == nil {
-		filepath.Walk(container.RootfsPath(), func(path string, fileInfo os.FileInfo, err error) error {
-			if fileInfo == nil {
-				return nil
-			}
-			size := fileInfo.Size()
-			if size == 0 {
-				return nil
-			}
-
-			inode := fileInfo.Sys().(*syscall.Stat_t).Ino
-			if _, entryExists := data[inode]; entryExists {
-				return nil
-			}
-			data[inode] = false
-
-			sizeRootfs += size
-			return nil
-		})
 	}
 	}
 	return sizeRw, sizeRootfs
 	return sizeRw, sizeRootfs
 }
 }
@@ -1417,7 +1393,11 @@ func (container *Container) Copy(resource string) (archive.Archive, error) {
 		filter = []string{path.Base(basePath)}
 		filter = []string{path.Base(basePath)}
 		basePath = path.Dir(basePath)
 		basePath = path.Dir(basePath)
 	}
 	}
-	return archive.TarFilter(basePath, archive.Uncompressed, filter)
+	return archive.TarFilter(basePath, &archive.TarOptions{
+		Compression: archive.Uncompressed,
+		Includes:    filter,
+		Recursive:   true,
+	})
 }
 }
 
 
 // Returns true if the container exposes a certain port
 // Returns true if the container exposes a certain port

+ 170 - 0
contrib/docker-device-tool/device_tool.go

@@ -0,0 +1,170 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"github.com/dotcloud/docker/graphdriver/devmapper"
+	"os"
+	"path"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+func usage() {
+	fmt.Fprintf(os.Stderr, "Usage: %s <flags>  [status] | [list] | [device id]  | [resize new-pool-size] | [snap new-id base-id] | [remove id] | [mount id mountpoint]\n", os.Args[0])
+	flag.PrintDefaults()
+	os.Exit(1)
+}
+
+func byteSizeFromString(arg string) (int64, error) {
+	digits := ""
+	rest := ""
+	last := strings.LastIndexAny(arg, "0123456789")
+	if last >= 0 {
+		digits = arg[:last+1]
+		rest = arg[last+1:]
+	}
+
+	val, err := strconv.ParseInt(digits, 10, 64)
+	if err != nil {
+		return val, err
+	}
+
+	rest = strings.ToLower(strings.TrimSpace(rest))
+
+	var multiplier int64 = 1
+	switch rest {
+	case "":
+		multiplier = 1
+	case "k", "kb":
+		multiplier = 1024
+	case "m", "mb":
+		multiplier = 1024 * 1024
+	case "g", "gb":
+		multiplier = 1024 * 1024 * 1024
+	case "t", "tb":
+		multiplier = 1024 * 1024 * 1024 * 1024
+	default:
+		return 0, fmt.Errorf("Unknown size unit: %s", rest)
+	}
+
+	return val * multiplier, nil
+}
+
+func main() {
+	root := flag.String("r", "/var/lib/docker", "Docker root dir")
+	flDebug := flag.Bool("D", false, "Debug mode")
+
+	flag.Parse()
+
+	if *flDebug {
+		os.Setenv("DEBUG", "1")
+	}
+
+	if flag.NArg() < 1 {
+		usage()
+	}
+
+	args := flag.Args()
+
+	home := path.Join(*root, "devicemapper")
+	devices, err := devmapper.NewDeviceSet(home, false)
+	if err != nil {
+		fmt.Println("Can't initialize device mapper: ", err)
+		os.Exit(1)
+	}
+
+	switch args[0] {
+	case "status":
+		status := devices.Status()
+		fmt.Printf("Pool name: %s\n", status.PoolName)
+		fmt.Printf("Data Loopback file: %s\n", status.DataLoopback)
+		fmt.Printf("Metadata Loopback file: %s\n", status.MetadataLoopback)
+		fmt.Printf("Sector size: %d\n", status.SectorSize)
+		fmt.Printf("Data use: %d of %d (%.1f %%)\n", status.Data.Used, status.Data.Total, 100.0*float64(status.Data.Used)/float64(status.Data.Total))
+		fmt.Printf("Metadata use: %d of %d (%.1f %%)\n", status.Metadata.Used, status.Metadata.Total, 100.0*float64(status.Metadata.Used)/float64(status.Metadata.Total))
+		break
+	case "list":
+		ids := devices.List()
+		sort.Strings(ids)
+		for _, id := range ids {
+			fmt.Println(id)
+		}
+		break
+	case "device":
+		if flag.NArg() < 2 {
+			usage()
+		}
+		status, err := devices.GetDeviceStatus(args[1])
+		if err != nil {
+			fmt.Println("Can't get device info: ", err)
+			os.Exit(1)
+		}
+		fmt.Printf("Id: %d\n", status.DeviceId)
+		fmt.Printf("Size: %d\n", status.Size)
+		fmt.Printf("Transaction Id: %d\n", status.TransactionId)
+		fmt.Printf("Size in Sectors: %d\n", status.SizeInSectors)
+		fmt.Printf("Mapped Sectors: %d\n", status.MappedSectors)
+		fmt.Printf("Highest Mapped Sector: %d\n", status.HighestMappedSector)
+		break
+	case "resize":
+		if flag.NArg() < 2 {
+			usage()
+		}
+
+		size, err := byteSizeFromString(args[1])
+		if err != nil {
+			fmt.Println("Invalid size: ", err)
+			os.Exit(1)
+		}
+
+		err = devices.ResizePool(size)
+		if err != nil {
+			fmt.Println("Error resizeing pool: ", err)
+			os.Exit(1)
+		}
+
+		break
+	case "snap":
+		if flag.NArg() < 3 {
+			usage()
+		}
+
+		err := devices.AddDevice(args[1], args[2])
+		if err != nil {
+			fmt.Println("Can't create snap device: ", err)
+			os.Exit(1)
+		}
+		break
+	case "remove":
+		if flag.NArg() < 2 {
+			usage()
+		}
+
+		err := devices.RemoveDevice(args[1])
+		if err != nil {
+			fmt.Println("Can't remove device: ", err)
+			os.Exit(1)
+		}
+		break
+	case "mount":
+		if flag.NArg() < 3 {
+			usage()
+		}
+
+		err := devices.MountDevice(args[1], args[2], false)
+		if err != nil {
+			fmt.Println("Can't create snap device: ", err)
+			os.Exit(1)
+		}
+		break
+	default:
+		fmt.Printf("Unknown command %s\n", args[0])
+		usage()
+
+		os.Exit(1)
+	}
+
+	return
+}

+ 12 - 10
docker/docker.go

@@ -25,19 +25,20 @@ func main() {
 	}
 	}
 	// FIXME: Switch d and D ? (to be more sshd like)
 	// FIXME: Switch d and D ? (to be more sshd like)
 	flVersion := flag.Bool("v", false, "Print version information and quit")
 	flVersion := flag.Bool("v", false, "Print version information and quit")
-	flDaemon := flag.Bool("d", false, "Daemon mode")
-	flDebug := flag.Bool("D", false, "Debug mode")
+	flDaemon := flag.Bool("d", false, "Enable daemon mode")
+	flDebug := flag.Bool("D", false, "Enable debug mode")
 	flAutoRestart := flag.Bool("r", true, "Restart previously running containers")
 	flAutoRestart := flag.Bool("r", true, "Restart previously running containers")
-	bridgeName := flag.String("b", "", "Attach containers to a pre-existing network bridge. Use 'none' to disable container networking")
-	pidfile := flag.String("p", "/var/run/docker.pid", "File containing process PID")
-	flRoot := flag.String("g", "/var/lib/docker", "Path to use as the root of the docker runtime.")
-	flEnableCors := flag.Bool("api-enable-cors", false, "Enable CORS requests in the remote api.")
-	flDns := flag.String("dns", "", "Set custom dns servers")
+	bridgeName := flag.String("b", "", "Attach containers to a pre-existing network bridge; use 'none' to disable container networking")
+	pidfile := flag.String("p", "/var/run/docker.pid", "Path to use for daemon PID file")
+	flRoot := flag.String("g", "/var/lib/docker", "Path to use as the root of the docker runtime")
+	flEnableCors := flag.Bool("api-enable-cors", false, "Enable CORS headers in the remote API")
+	flDns := flag.String("dns", "", "Force docker to use specific DNS servers")
 	flHosts := utils.ListOpts{fmt.Sprintf("unix://%s", docker.DEFAULTUNIXSOCKET)}
 	flHosts := utils.ListOpts{fmt.Sprintf("unix://%s", docker.DEFAULTUNIXSOCKET)}
-	flag.Var(&flHosts, "H", "tcp://host:port to bind/connect to or unix://path/to/socket to use")
-	flEnableIptables := flag.Bool("iptables", true, "Disable iptables within docker")
-	flDefaultIp := flag.String("ip", "0.0.0.0", "Default ip address to use when binding a containers ports")
+	flag.Var(&flHosts, "H", "Multiple tcp://host:port or unix://path/to/socket to bind in daemon mode, single connection otherwise")
+	flEnableIptables := flag.Bool("iptables", true, "Disable docker's addition of iptables rules")
+	flDefaultIp := flag.String("ip", "0.0.0.0", "Default IP address to use when binding container ports")
 	flInterContainerComm := flag.Bool("icc", true, "Enable inter-container communication")
 	flInterContainerComm := flag.Bool("icc", true, "Enable inter-container communication")
+	flGraphDriver := flag.String("s", "", "Force the docker runtime to use a specific storage driver")
 
 
 	flag.Parse()
 	flag.Parse()
 
 
@@ -82,6 +83,7 @@ func main() {
 		job.Setenv("BridgeIface", *bridgeName)
 		job.Setenv("BridgeIface", *bridgeName)
 		job.Setenv("DefaultIp", *flDefaultIp)
 		job.Setenv("DefaultIp", *flDefaultIp)
 		job.SetenvBool("InterContainerCommunication", *flInterContainerComm)
 		job.SetenvBool("InterContainerCommunication", *flInterContainerComm)
+		job.Setenv("GraphDriver", *flGraphDriver)
 		if err := job.Run(); err != nil {
 		if err := job.Run(); err != nil {
 			log.Fatal(err)
 			log.Fatal(err)
 		}
 		}

+ 1 - 1
docs/Dockerfile

@@ -9,7 +9,7 @@ run apt-get install -y python-setuptools make
 run easy_install pip
 run easy_install pip
 #from docs/requirements.txt, but here to increase cacheability
 #from docs/requirements.txt, but here to increase cacheability
 run pip install Sphinx==1.1.3
 run pip install Sphinx==1.1.3
-run pip install sphinxcontrib-httpdomain==1.1.8
+run pip install sphinxcontrib-httpdomain==1.1.9
 add . /docs
 add . /docs
 run cd /docs; make docs
 run cd /docs; make docs
 
 

+ 32 - 0
docs/sources/commandline/cli.rst

@@ -18,6 +18,38 @@ To list available commands, either run ``docker`` with no parameters or execute
 
 
     ...
     ...
 
 
+.. _cli_daemon:
+
+``daemon``
+----------
+
+::
+
+    Usage of docker:
+      -D=false: Enable debug mode
+      -H=[unix:///var/run/docker.sock]: Multiple tcp://host:port or unix://path/to/socket to bind in daemon mode, single connection otherwise
+      -api-enable-cors=false: Enable CORS headers in the remote API
+      -b="": Attach containers to a pre-existing network bridge; use 'none' to disable container networking
+      -d=false: Enable daemon mode
+      -dns="": Force docker to use specific DNS servers
+      -g="/var/lib/docker": Path to use as the root of the docker runtime
+      -icc=true: Enable inter-container communication
+      -ip="0.0.0.0": Default IP address to use when binding container ports
+      -iptables=true: Disable docker's addition of iptables rules
+      -p="/var/run/docker.pid": Path to use for daemon PID file
+      -r=true: Restart previously running containers
+      -s="": Force the docker runtime to use a specific storage driver
+      -v=false: Print version information and quit
+
+The docker daemon is the persistent process that manages containers.  Docker uses the same binary for both the 
+daemon and client.  To run the daemon you provide the ``-d`` flag.
+
+To force docker to use devicemapper as the storage driver, use ``docker -d -s devicemapper``
+
+To set the dns server for all docker containers, use ``docker -d -dns 8.8.8.8``
+
+To run the daemon with debug output, use ``docker -d -D``
+
 .. _cli_attach:
 .. _cli_attach:
 
 
 ``attach``
 ``attach``

+ 2 - 13
docs/sources/installation/kernel.rst

@@ -11,10 +11,10 @@ In short, Docker has the following kernel requirements:
 
 
 - Linux version 3.8 or above.
 - Linux version 3.8 or above.
 
 
-- `AUFS support <http://aufs.sourceforge.net/>`_.
-
 - Cgroups and namespaces must be enabled.
 - Cgroups and namespaces must be enabled.
 
 
+*Note: as of 0.7 docker no longer requires aufs. AUFS support is still available as an optional driver.*
+
 The officially supported kernel is the one recommended by the
 The officially supported kernel is the one recommended by the
 :ref:`ubuntu_linux` installation path. It is the one that most developers
 :ref:`ubuntu_linux` installation path. It is the one that most developers
 will use, and the one that receives the most attention from the core
 will use, and the one that receives the most attention from the core
@@ -58,17 +58,6 @@ detects something older than 3.8.
 See issue `#407 <https://github.com/dotcloud/docker/issues/407>`_ for details.
 See issue `#407 <https://github.com/dotcloud/docker/issues/407>`_ for details.
 
 
 
 
-AUFS support
-------------
-
-Docker currently relies on AUFS, an unioning filesystem.
-While AUFS is included in the kernels built by the Debian and Ubuntu
-distributions, is not part of the standard kernel. This means that if
-you decide to roll your own kernel, you will have to patch your
-kernel tree to add AUFS. The process is documented on
-`AUFS webpage <http://aufs.sourceforge.net/>`_.
-
-
 Cgroups and namespaces
 Cgroups and namespaces
 ----------------------
 ----------------------
 
 

+ 6 - 8
docs/sources/installation/ubuntulinux.rst

@@ -14,16 +14,11 @@ Ubuntu Linux
 
 
 .. include:: install_header.inc
 .. include:: install_header.inc
 
 
-Right now, the officially supported distribution are:
+Docker is supported on the following versions of Ubuntu:
 
 
 - :ref:`ubuntu_precise`
 - :ref:`ubuntu_precise`
 - :ref:`ubuntu_raring`
 - :ref:`ubuntu_raring`
 
 
-Docker has the following dependencies
-
-* Linux kernel 3.8 (read more about :ref:`kernel`)
-* AUFS file system support (we are working on BTRFS support as an alternative)
-
 Please read :ref:`ufw`, if you plan to use `UFW (Uncomplicated
 Please read :ref:`ufw`, if you plan to use `UFW (Uncomplicated
 Firewall) <https://help.ubuntu.com/community/UFW>`_
 Firewall) <https://help.ubuntu.com/community/UFW>`_
 
 
@@ -107,10 +102,13 @@ Ubuntu Raring 13.04 (64 bit)
 Dependencies
 Dependencies
 ------------
 ------------
 
 
-**AUFS filesystem support**
+**Optional AUFS filesystem support**
 
 
 Ubuntu Raring already comes with the 3.8 kernel, so we don't need to install it. However, not all systems
 Ubuntu Raring already comes with the 3.8 kernel, so we don't need to install it. However, not all systems
-have AUFS filesystem support enabled, so we need to install it.
+have AUFS filesystem support enabled. AUFS support is optional as of version 0.7, but it's still available as
+a driver and we recommend using it if you can.
+
+To make sure aufs is installed, run the following commands:
 
 
 .. code-block:: bash
 .. code-block:: bash
 
 

+ 62 - 43
graph.go

@@ -3,6 +3,7 @@ package docker
 import (
 import (
 	"fmt"
 	"fmt"
 	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
@@ -10,6 +11,7 @@ import (
 	"path"
 	"path"
 	"path/filepath"
 	"path/filepath"
 	"strings"
 	"strings"
+	"syscall"
 	"time"
 	"time"
 )
 )
 
 
@@ -17,11 +19,12 @@ import (
 type Graph struct {
 type Graph struct {
 	Root    string
 	Root    string
 	idIndex *utils.TruncIndex
 	idIndex *utils.TruncIndex
+	driver  graphdriver.Driver
 }
 }
 
 
 // NewGraph instantiates a new graph at the given root path in the filesystem.
 // NewGraph instantiates a new graph at the given root path in the filesystem.
 // `root` will be created if it doesn't exist.
 // `root` will be created if it doesn't exist.
-func NewGraph(root string) (*Graph, error) {
+func NewGraph(root string, driver graphdriver.Driver) (*Graph, error) {
 	abspath, err := filepath.Abs(root)
 	abspath, err := filepath.Abs(root)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
@@ -30,9 +33,11 @@ func NewGraph(root string) (*Graph, error) {
 	if err := os.MkdirAll(root, 0700); err != nil && !os.IsExist(err) {
 	if err := os.MkdirAll(root, 0700); err != nil && !os.IsExist(err) {
 		return nil, err
 		return nil, err
 	}
 	}
+
 	graph := &Graph{
 	graph := &Graph{
 		Root:    abspath,
 		Root:    abspath,
 		idIndex: utils.NewTruncIndex(),
 		idIndex: utils.NewTruncIndex(),
+		driver:  driver,
 	}
 	}
 	if err := graph.restore(); err != nil {
 	if err := graph.restore(); err != nil {
 		return nil, err
 		return nil, err
@@ -47,7 +52,9 @@ func (graph *Graph) restore() error {
 	}
 	}
 	for _, v := range dir {
 	for _, v := range dir {
 		id := v.Name()
 		id := v.Name()
-		graph.idIndex.Add(id)
+		if graph.driver.Exists(id) {
+			graph.idIndex.Add(id)
+		}
 	}
 	}
 	return nil
 	return nil
 }
 }
@@ -78,16 +85,22 @@ func (graph *Graph) Get(name string) (*Image, error) {
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
+	// Check that the filesystem layer exists
+	rootfs, err := graph.driver.Get(img.ID)
+	if err != nil {
+		return nil, fmt.Errorf("Driver %s failed to get image rootfs %s: %s", graph.driver, img.ID, err)
+	}
 	if img.ID != id {
 	if img.ID != id {
 		return nil, fmt.Errorf("Image stored at '%s' has wrong id '%s'", id, img.ID)
 		return nil, fmt.Errorf("Image stored at '%s' has wrong id '%s'", id, img.ID)
 	}
 	}
 	img.graph = graph
 	img.graph = graph
 	if img.Size == 0 {
 	if img.Size == 0 {
-		root, err := img.root()
+		size, err := utils.TreeSize(rootfs)
 		if err != nil {
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("Error computing size of rootfs %s: %s", img.ID, err)
 		}
 		}
-		if err := StoreSize(img, root); err != nil {
+		img.Size = size
+		if err := img.SaveSize(graph.imageRoot(id)); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
 	}
 	}
@@ -126,19 +139,37 @@ func (graph *Graph) Register(jsonData []byte, layerData archive.Archive, img *Im
 	if graph.Exists(img.ID) {
 	if graph.Exists(img.ID) {
 		return fmt.Errorf("Image %s already exists", img.ID)
 		return fmt.Errorf("Image %s already exists", img.ID)
 	}
 	}
+
+	// Ensure that the image root does not exist on the filesystem
+	// when it is not registered in the graph.
+	// This is common when you switch from one graph driver to another
+	if err := os.RemoveAll(graph.imageRoot(img.ID)); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+
 	tmp, err := graph.Mktemp("")
 	tmp, err := graph.Mktemp("")
 	defer os.RemoveAll(tmp)
 	defer os.RemoveAll(tmp)
 	if err != nil {
 	if err != nil {
 		return fmt.Errorf("Mktemp failed: %s", err)
 		return fmt.Errorf("Mktemp failed: %s", err)
 	}
 	}
-	if err := StoreImage(img, jsonData, layerData, tmp); err != nil {
+
+	// Create root filesystem in the driver
+	if err := graph.driver.Create(img.ID, img.Parent); err != nil {
+		return fmt.Errorf("Driver %s failed to create image rootfs %s: %s", graph.driver, img.ID, err)
+	}
+	// Mount the root filesystem so we can apply the diff/layer
+	rootfs, err := graph.driver.Get(img.ID)
+	if err != nil {
+		return fmt.Errorf("Driver %s failed to get image rootfs %s: %s", graph.driver, img.ID, err)
+	}
+	img.graph = graph
+	if err := StoreImage(img, jsonData, layerData, tmp, rootfs); err != nil {
 		return err
 		return err
 	}
 	}
 	// Commit
 	// Commit
 	if err := os.Rename(tmp, graph.imageRoot(img.ID)); err != nil {
 	if err := os.Rename(tmp, graph.imageRoot(img.ID)); err != nil {
 		return err
 		return err
 	}
 	}
-	img.graph = graph
 	graph.idIndex.Add(img.ID)
 	graph.idIndex.Add(img.ID)
 	return nil
 	return nil
 }
 }
@@ -152,50 +183,33 @@ func (graph *Graph) TempLayerArchive(id string, compression archive.Compression,
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	tmp, err := graph.tmp()
+	tmp, err := graph.Mktemp("")
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	a, err := image.TarLayer(compression)
+	a, err := image.TarLayer()
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	return archive.NewTempArchive(utils.ProgressReader(ioutil.NopCloser(a), 0, output, sf.FormatProgress("", "Buffering to disk", "%v/%v (%v)"), sf, true), tmp.Root)
+	return archive.NewTempArchive(utils.ProgressReader(ioutil.NopCloser(a), 0, output, sf.FormatProgress("", "Buffering to disk", "%v/%v (%v)"), sf, true), tmp)
 }
 }
 
 
 // Mktemp creates a temporary sub-directory inside the graph's filesystem.
 // Mktemp creates a temporary sub-directory inside the graph's filesystem.
 func (graph *Graph) Mktemp(id string) (string, error) {
 func (graph *Graph) Mktemp(id string) (string, error) {
-	if id == "" {
-		id = GenerateID()
-	}
-	tmp, err := graph.tmp()
-	if err != nil {
-		return "", fmt.Errorf("Couldn't create temp: %s", err)
-	}
-	if tmp.Exists(id) {
-		return "", fmt.Errorf("Image %s already exists", id)
+	dir := path.Join(graph.Root, "_tmp", GenerateID())
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return "", err
 	}
 	}
-	return tmp.imageRoot(id), nil
+	return dir, nil
 }
 }
 
 
-// getDockerInitLayer returns the path of a layer containing a mountpoint suitable
+// setupInitLayer populates a directory with mountpoints suitable
 // for bind-mounting dockerinit into the container. The mountpoint is simply an
 // for bind-mounting dockerinit into the container. The mountpoint is simply an
 // empty file at /.dockerinit
 // empty file at /.dockerinit
 //
 //
 // This extra layer is used by all containers as the top-most ro layer. It protects
 // This extra layer is used by all containers as the top-most ro layer. It protects
 // the container from unwanted side-effects on the rw layer.
 // the container from unwanted side-effects on the rw layer.
-func (graph *Graph) getDockerInitLayer() (string, error) {
-	tmp, err := graph.tmp()
-	if err != nil {
-		return "", err
-	}
-	initLayer := tmp.imageRoot("_dockerinit")
-	if err := os.Mkdir(initLayer, 0755); err != nil && !os.IsExist(err) {
-		// If directory already existed, keep going.
-		// For all other errors, abort.
-		return "", err
-	}
-
+func setupInitLayer(initLayer string) error {
 	for pth, typ := range map[string]string{
 	for pth, typ := range map[string]string{
 		"/dev/pts":         "dir",
 		"/dev/pts":         "dir",
 		"/dev/shm":         "dir",
 		"/dev/shm":         "dir",
@@ -209,36 +223,38 @@ func (graph *Graph) getDockerInitLayer() (string, error) {
 		// "var/run": "dir",
 		// "var/run": "dir",
 		// "var/lock": "dir",
 		// "var/lock": "dir",
 	} {
 	} {
+		parts := strings.Split(pth, "/")
+		prev := "/"
+		for _, p := range parts[1:] {
+			prev = path.Join(prev, p)
+			syscall.Unlink(path.Join(initLayer, prev))
+		}
+
 		if _, err := os.Stat(path.Join(initLayer, pth)); err != nil {
 		if _, err := os.Stat(path.Join(initLayer, pth)); err != nil {
 			if os.IsNotExist(err) {
 			if os.IsNotExist(err) {
 				switch typ {
 				switch typ {
 				case "dir":
 				case "dir":
 					if err := os.MkdirAll(path.Join(initLayer, pth), 0755); err != nil {
 					if err := os.MkdirAll(path.Join(initLayer, pth), 0755); err != nil {
-						return "", err
+						return err
 					}
 					}
 				case "file":
 				case "file":
 					if err := os.MkdirAll(path.Join(initLayer, path.Dir(pth)), 0755); err != nil {
 					if err := os.MkdirAll(path.Join(initLayer, path.Dir(pth)), 0755); err != nil {
-						return "", err
+						return err
 					}
 					}
 					f, err := os.OpenFile(path.Join(initLayer, pth), os.O_CREATE, 0755)
 					f, err := os.OpenFile(path.Join(initLayer, pth), os.O_CREATE, 0755)
 					if err != nil {
 					if err != nil {
-						return "", err
+						return err
 					}
 					}
 					f.Close()
 					f.Close()
 				}
 				}
 			} else {
 			} else {
-				return "", err
+				return err
 			}
 			}
 		}
 		}
 	}
 	}
 
 
 	// Layer is ready to use, if it wasn't before.
 	// Layer is ready to use, if it wasn't before.
-	return initLayer, nil
-}
-
-func (graph *Graph) tmp() (*Graph, error) {
-	// Changed to _tmp from :tmp:, because it messed with ":" separators in aufs branch syntax...
-	return NewGraph(path.Join(graph.Root, "_tmp"))
+	return nil
 }
 }
 
 
 // Check if given error is "not empty".
 // Check if given error is "not empty".
@@ -270,6 +286,9 @@ func (graph *Graph) Delete(name string) error {
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
+	// Remove rootfs data from the driver
+	graph.driver.Remove(id)
+	// Remove the trashed image directory
 	return os.RemoveAll(tmp)
 	return os.RemoveAll(tmp)
 }
 }
 
 

+ 18 - 8
graph_test.go

@@ -5,6 +5,7 @@ import (
 	"bytes"
 	"bytes"
 	"errors"
 	"errors"
 	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
@@ -15,7 +16,7 @@ import (
 
 
 func TestInit(t *testing.T) {
 func TestInit(t *testing.T) {
 	graph := tempGraph(t)
 	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
+	defer nukeGraph(graph)
 	// Root should exist
 	// Root should exist
 	if _, err := os.Stat(graph.Root); err != nil {
 	if _, err := os.Stat(graph.Root); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
@@ -31,7 +32,7 @@ func TestInit(t *testing.T) {
 // Test that Register can be interrupted cleanly without side effects
 // Test that Register can be interrupted cleanly without side effects
 func TestInterruptedRegister(t *testing.T) {
 func TestInterruptedRegister(t *testing.T) {
 	graph := tempGraph(t)
 	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
+	defer nukeGraph(graph)
 	badArchive, w := io.Pipe() // Use a pipe reader as a fake archive which never yields data
 	badArchive, w := io.Pipe() // Use a pipe reader as a fake archive which never yields data
 	image := &Image{
 	image := &Image{
 		ID:      GenerateID(),
 		ID:      GenerateID(),
@@ -58,7 +59,7 @@ func TestInterruptedRegister(t *testing.T) {
 //       create multiple, check the amount of images and paths, etc..)
 //       create multiple, check the amount of images and paths, etc..)
 func TestGraphCreate(t *testing.T) {
 func TestGraphCreate(t *testing.T) {
 	graph := tempGraph(t)
 	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
+	defer nukeGraph(graph)
 	archive, err := fakeTar()
 	archive, err := fakeTar()
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
@@ -89,7 +90,7 @@ func TestGraphCreate(t *testing.T) {
 
 
 func TestRegister(t *testing.T) {
 func TestRegister(t *testing.T) {
 	graph := tempGraph(t)
 	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
+	defer nukeGraph(graph)
 	archive, err := fakeTar()
 	archive, err := fakeTar()
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
@@ -123,7 +124,7 @@ func TestRegister(t *testing.T) {
 // Test that an image can be deleted by its shorthand prefix
 // Test that an image can be deleted by its shorthand prefix
 func TestDeletePrefix(t *testing.T) {
 func TestDeletePrefix(t *testing.T) {
 	graph := tempGraph(t)
 	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
+	defer nukeGraph(graph)
 	img := createTestImage(graph, t)
 	img := createTestImage(graph, t)
 	if err := graph.Delete(utils.TruncateID(img.ID)); err != nil {
 	if err := graph.Delete(utils.TruncateID(img.ID)); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
@@ -145,7 +146,7 @@ func createTestImage(graph *Graph, t *testing.T) *Image {
 
 
 func TestDelete(t *testing.T) {
 func TestDelete(t *testing.T) {
 	graph := tempGraph(t)
 	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
+	defer nukeGraph(graph)
 	archive, err := fakeTar()
 	archive, err := fakeTar()
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
@@ -209,7 +210,7 @@ func TestByParent(t *testing.T) {
 	archive3, _ := fakeTar()
 	archive3, _ := fakeTar()
 
 
 	graph := tempGraph(t)
 	graph := tempGraph(t)
-	defer os.RemoveAll(graph.Root)
+	defer nukeGraph(graph)
 	parentImage := &Image{
 	parentImage := &Image{
 		ID:      GenerateID(),
 		ID:      GenerateID(),
 		Comment: "parent",
 		Comment: "parent",
@@ -259,13 +260,22 @@ func tempGraph(t *testing.T) *Graph {
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	graph, err := NewGraph(tmp)
+	backend, err := graphdriver.New(tmp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	graph, err := NewGraph(tmp, backend)
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
 	return graph
 	return graph
 }
 }
 
 
+func nukeGraph(graph *Graph) {
+	graph.driver.Cleanup()
+	os.RemoveAll(graph.Root)
+}
+
 func testArchive(t *testing.T) archive.Archive {
 func testArchive(t *testing.T) archive.Archive {
 	archive, err := fakeTar()
 	archive, err := fakeTar()
 	if err != nil {
 	if err != nil {

+ 0 - 0
gograph/MAINTAINERS → graphdb/MAINTAINERS


+ 1 - 1
gograph/gograph.go → graphdb/graphdb.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 
 import (
 import (
 	"database/sql"
 	"database/sql"

+ 1 - 1
gograph/gograph_test.go → graphdb/graphdb_test.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 
 import (
 import (
 	_ "code.google.com/p/gosqlite/sqlite3"
 	_ "code.google.com/p/gosqlite/sqlite3"

+ 1 - 1
gograph/sort.go → graphdb/sort.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 
 import "sort"
 import "sort"
 
 

+ 1 - 1
gograph/sort_test.go → graphdb/sort_test.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 
 import (
 import (
 	"testing"
 	"testing"

+ 1 - 1
gograph/utils.go → graphdb/utils.go

@@ -1,4 +1,4 @@
-package gograph
+package graphdb
 
 
 import (
 import (
 	"path"
 	"path"

+ 336 - 0
graphdriver/aufs/aufs.go

@@ -0,0 +1,336 @@
+/*
+
+aufs driver directory structure
+
+.
+├── layers // Metadata of layers
+│   ├── 1
+│   ├── 2
+│   └── 3
+├── diffs  // Content of the layer
+│   ├── 1  // Contains layers that need to be mounted for the id
+│   ├── 2
+│   └── 3
+└── mnt    // Mount points for the rw layers to be mounted
+    ├── 1
+    ├── 2
+    └── 3
+
+*/
+
+package aufs
+
+import (
+	"bufio"
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
+	"github.com/dotcloud/docker/utils"
+	"log"
+	"os"
+	"os/exec"
+	"path"
+	"strings"
+)
+
+func init() {
+	graphdriver.Register("aufs", Init)
+}
+
+type Driver struct {
+	root string
+}
+
+// New returns a new AUFS driver.
+// An error is returned if AUFS is not supported.
+func Init(root string) (graphdriver.Driver, error) {
+	// Try to load the aufs kernel module
+	if err := supportsAufs(); err != nil {
+		return nil, err
+	}
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	// Create the root aufs driver dir and return
+	// if it already exists
+	// If not populate the dir structure
+	if err := os.MkdirAll(root, 0755); err != nil {
+		if os.IsExist(err) {
+			return &Driver{root}, nil
+		}
+		return nil, err
+	}
+
+	for _, p := range paths {
+		if err := os.MkdirAll(path.Join(root, p), 0755); err != nil {
+			return nil, err
+		}
+	}
+	return &Driver{root}, nil
+}
+
+// Return a nil error if the kernel supports aufs
+// We cannot modprobe because inside dind modprobe fails
+// to run
+func supportsAufs() error {
+	// We can try to modprobe aufs first before looking at
+	// proc/filesystems for when aufs is supported
+	exec.Command("modprobe", "aufs").Run()
+
+	f, err := os.Open("/proc/filesystems")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		if strings.Contains(s.Text(), "aufs") {
+			return nil
+		}
+	}
+	return fmt.Errorf("AUFS was not found in /proc/filesystems")
+}
+
+func (a Driver) rootPath() string {
+	return a.root
+}
+
+func (Driver) String() string {
+	return "aufs"
+}
+
+func (a Driver) Status() [][2]string {
+	ids, _ := loadIds(path.Join(a.rootPath(), "layers"))
+	return [][2]string{
+		{"Root Dir", a.rootPath()},
+		{"Dirs", fmt.Sprintf("%d", len(ids))},
+	}
+}
+
+// Exists returns true if the given id is registered with
+// this driver
+func (a Driver) Exists(id string) bool {
+	if _, err := os.Lstat(path.Join(a.rootPath(), "layers", id)); err != nil {
+		return false
+	}
+	return true
+}
+
+// Three folders are created for each id
+// mnt, layers, and diff
+func (a *Driver) Create(id, parent string) error {
+	if err := a.createDirsFor(id); err != nil {
+		return err
+	}
+	// Write the layers metadata
+	f, err := os.Create(path.Join(a.rootPath(), "layers", id))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	if parent != "" {
+		ids, err := getParentIds(a.rootPath(), parent)
+		if err != nil {
+			return err
+		}
+
+		if _, err := fmt.Fprintln(f, parent); err != nil {
+			return err
+		}
+		for _, i := range ids {
+			if _, err := fmt.Fprintln(f, i); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (a *Driver) createDirsFor(id string) error {
+	paths := []string{
+		"mnt",
+		"diff",
+	}
+
+	for _, p := range paths {
+		if err := os.MkdirAll(path.Join(a.rootPath(), p, id), 0755); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Unmount and remove the dir information
+func (a *Driver) Remove(id string) error {
+	// Make sure the dir is umounted first
+	if err := a.unmount(id); err != nil {
+		return err
+	}
+	tmpDirs := []string{
+		"mnt",
+		"diff",
+	}
+
+	// Remove the dirs atomically
+	for _, p := range tmpDirs {
+		// We need to use a temp dir in the same dir as the driver so Rename
+		// does not fall back to the slow copy if /tmp and the driver dir
+		// are on different devices
+		tmp := path.Join(a.rootPath(), "tmp", p, id)
+		if err := os.MkdirAll(tmp, 0755); err != nil {
+			return err
+		}
+		realPath := path.Join(a.rootPath(), p, id)
+		if err := os.Rename(realPath, tmp); err != nil && !os.IsNotExist(err) {
+			return err
+		}
+		defer os.RemoveAll(tmp)
+	}
+
+	// Remove the layers file for the id
+	if err := os.Remove(path.Join(a.rootPath(), "layers", id)); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return nil
+}
+
+// Return the rootfs path for the id
+// This will mount the dir at it's given path
+func (a *Driver) Get(id string) (string, error) {
+	ids, err := getParentIds(a.rootPath(), id)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			return "", err
+		}
+		ids = []string{}
+	}
+
+	// If a dir does not have a parent ( no layers )do not try to mount
+	// just return the diff path to the data
+	out := path.Join(a.rootPath(), "diff", id)
+	if len(ids) > 0 {
+		out = path.Join(a.rootPath(), "mnt", id)
+		if err := a.mount(id); err != nil {
+			return "", err
+		}
+	}
+	return out, nil
+}
+
+// Returns an archive of the contents for the id
+func (a *Driver) Diff(id string) (archive.Archive, error) {
+	return archive.TarFilter(path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
+		Recursive:   true,
+		Compression: archive.Uncompressed,
+	})
+}
+
+func (a *Driver) ApplyDiff(id string, diff archive.Archive) error {
+	return archive.Untar(diff, path.Join(a.rootPath(), "diff", id), nil)
+}
+
+// Returns the size of the contents for the id
+func (a *Driver) DiffSize(id string) (int64, error) {
+	return utils.TreeSize(path.Join(a.rootPath(), "diff", id))
+}
+
+func (a *Driver) Changes(id string) ([]archive.Change, error) {
+	layers, err := a.getParentLayerPaths(id)
+	if err != nil {
+		return nil, err
+	}
+	return archive.Changes(layers, path.Join(a.rootPath(), "diff", id))
+}
+
+func (a *Driver) getParentLayerPaths(id string) ([]string, error) {
+	parentIds, err := getParentIds(a.rootPath(), id)
+	if err != nil {
+		return nil, err
+	}
+	if len(parentIds) == 0 {
+		return nil, fmt.Errorf("Dir %s does not have any parent layers", id)
+	}
+	layers := make([]string, len(parentIds))
+
+	// Get the diff paths for all the parent ids
+	for i, p := range parentIds {
+		layers[i] = path.Join(a.rootPath(), "diff", p)
+	}
+	return layers, nil
+}
+
+func (a *Driver) mount(id string) error {
+	// If the id is mounted or we get an error return
+	if mounted, err := a.mounted(id); err != nil || mounted {
+		return err
+	}
+
+	var (
+		target = path.Join(a.rootPath(), "mnt", id)
+		rw     = path.Join(a.rootPath(), "diff", id)
+	)
+
+	layers, err := a.getParentLayerPaths(id)
+	if err != nil {
+		return err
+	}
+
+	if err := a.aufsMount(layers, rw, target); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (a *Driver) unmount(id string) error {
+	if mounted, err := a.mounted(id); err != nil || !mounted {
+		return err
+	}
+	target := path.Join(a.rootPath(), "mnt", id)
+	return Unmount(target)
+}
+
+func (a *Driver) mounted(id string) (bool, error) {
+	target := path.Join(a.rootPath(), "mnt", id)
+	return Mounted(target)
+}
+
+// During cleanup aufs needs to unmount all mountpoints
+func (a *Driver) Cleanup() error {
+	ids, err := loadIds(path.Join(a.rootPath(), "layers"))
+	if err != nil {
+		return err
+	}
+	for _, id := range ids {
+		if err := a.unmount(id); err != nil {
+			utils.Errorf("Unmounting %s: %s", utils.TruncateID(id), err)
+		}
+	}
+	return nil
+}
+
+func (a *Driver) aufsMount(ro []string, rw, target string) error {
+	rwBranch := fmt.Sprintf("%v=rw", rw)
+	roBranches := ""
+	for _, layer := range ro {
+		roBranches += fmt.Sprintf("%v=ro+wh:", layer)
+	}
+	branches := fmt.Sprintf("br:%v:%v,xino=/dev/shm/aufs.xino", rwBranch, roBranches)
+
+	//if error, try to load aufs kernel module
+	if err := mount("none", target, "aufs", 0, branches); err != nil {
+		log.Printf("Kernel does not support AUFS, trying to load the AUFS module with modprobe...")
+		if err := exec.Command("modprobe", "aufs").Run(); err != nil {
+			return fmt.Errorf("Unable to load the AUFS module")
+		}
+		log.Printf("...module loaded.")
+		if err := mount("none", target, "aufs", 0, branches); err != nil {
+			return fmt.Errorf("Unable to mount using aufs %s", err)
+		}
+	}
+	return nil
+}

+ 623 - 0
graphdriver/aufs/aufs_test.go

@@ -0,0 +1,623 @@
+package aufs
+
+import (
+	"github.com/dotcloud/docker/archive"
+	"os"
+	"path"
+	"testing"
+)
+
+var (
+	tmp = path.Join(os.TempDir(), "aufs-tests", "aufs")
+)
+
+func newDriver(t *testing.T) *Driver {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	d, err := Init(tmp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return d.(*Driver)
+}
+
+func TestNewDriver(t *testing.T) {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	d, err := Init(tmp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(tmp)
+	if d == nil {
+		t.Fatalf("Driver should not be nil")
+	}
+}
+
+func TestAufsString(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if d.String() != "aufs" {
+		t.Fatalf("Expected aufs got %s", d.String())
+	}
+}
+
+func TestCreateDirStructure(t *testing.T) {
+	newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	paths := []string{
+		"mnt",
+		"layers",
+		"diff",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p)); err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+// We should be able to create two drivers with the same dir structure
+func TestNewDriverFromExistingDir(t *testing.T) {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := Init(tmp); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := Init(tmp); err != nil {
+		t.Fatal(err)
+	}
+	os.RemoveAll(tmp)
+}
+
+func TestCreateNewDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCreateNewDirStructure(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p, "1")); err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+func TestRemoveImage(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Remove("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p, "1")); err == nil {
+			t.Fatalf("Error should not be nil because dirs with id 1 should be delted: %s", p)
+		}
+	}
+}
+
+func TestGetWithoutParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	expected := path.Join(tmp, "diff", "1")
+	if diffPath != expected {
+		t.Fatalf("Expected path %s got %s", expected, diffPath)
+	}
+}
+
+func TestCleanupWithNoDirs(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCleanupWithDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMountedFalseResponse(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	response, err := d.mounted("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if response != false {
+		t.Fatalf("Response if dir id 1 is mounted should be false")
+	}
+}
+
+func TestMountedTrueReponse(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	_, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	response, err := d.mounted("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if response != true {
+		t.Fatalf("Response if dir id 2 is mounted should be true")
+	}
+}
+
+func TestMountWithParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPath, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mntPath == "" {
+		t.Fatal("mntPath should not be empty string")
+	}
+
+	expected := path.Join(tmp, "mnt", "2")
+	if mntPath != expected {
+		t.Fatalf("Expected %s got %s", expected, mntPath)
+	}
+}
+
+func TestRemoveMountedDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPath, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mntPath == "" {
+		t.Fatal("mntPath should not be empty string")
+	}
+
+	mounted, err := d.mounted("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !mounted {
+		t.Fatalf("Dir id 2 should be mounted")
+	}
+
+	if err := d.Remove("2"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCreateWithInvalidParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "docker"); err == nil {
+		t.Fatalf("Error should not be nil with parent does not exist")
+	}
+}
+
+func TestGetDiff(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	a, err := d.Diff("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if a == nil {
+		t.Fatalf("Archive should not be nil")
+	}
+}
+
+func TestChanges(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPoint, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a file to save in the mountpoint
+	f, err := os.Create(path.Join(mntPoint, "test.txt"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := f.WriteString("testline"); err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	changes, err := d.Changes("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(changes) != 1 {
+		t.Fatalf("Dir 2 should have one change from parent got %d", len(changes))
+	}
+	change := changes[0]
+
+	expectedPath := "/test.txt"
+	if change.Path != expectedPath {
+		t.Fatalf("Expected path %s got %s", expectedPath, change.Path)
+	}
+
+	if change.Kind != archive.ChangeAdd {
+		t.Fatalf("Change kind should be ChangeAdd got %s", change.Kind)
+	}
+
+	if err := d.Create("3", "2"); err != nil {
+		t.Fatal(err)
+	}
+	mntPoint, err = d.Get("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a file to save in the mountpoint
+	f, err = os.Create(path.Join(mntPoint, "test2.txt"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := f.WriteString("testline"); err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	changes, err = d.Changes("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(changes) != 1 {
+		t.Fatalf("Dir 2 should have one change from parent got %d", len(changes))
+	}
+	change = changes[0]
+
+	expectedPath = "/test2.txt"
+	if change.Path != expectedPath {
+		t.Fatalf("Expected path %s got %s", expectedPath, change.Path)
+	}
+
+	if change.Kind != archive.ChangeAdd {
+		t.Fatalf("Change kind should be ChangeAdd got %s", change.Kind)
+	}
+}
+
+func TestDiffSize(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	s, err := f.Stat()
+	if err != nil {
+		t.Fatal(err)
+	}
+	size = s.Size()
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err := d.DiffSize("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if diffSize != size {
+		t.Fatalf("Expected size to be %d got %d", size, diffSize)
+	}
+}
+
+func TestChildDiffSize(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	s, err := f.Stat()
+	if err != nil {
+		t.Fatal(err)
+	}
+	size = s.Size()
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err := d.DiffSize("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if diffSize != size {
+		t.Fatalf("Expected size to be %d got %d", size, diffSize)
+	}
+
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err = d.DiffSize("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	// The diff size for the child should be zero
+	if diffSize != 0 {
+		t.Fatalf("Expected size to be %d got %d", 0, diffSize)
+	}
+}
+
+func TestExists(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if d.Exists("none") {
+		t.Fatal("id name should not exist in the driver")
+	}
+
+	if !d.Exists("1") {
+		t.Fatal("id 1 should exist in the driver")
+	}
+}
+
+func TestStatus(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	status := d.Status()
+	if status == nil || len(status) == 0 {
+		t.Fatal("Status should not be nil or empty")
+	}
+	rootDir := status[0]
+	dirs := status[1]
+	if rootDir[0] != "Root Dir" {
+		t.Fatalf("Expected Root Dir got %s", rootDir[0])
+	}
+	if rootDir[1] != d.rootPath() {
+		t.Fatalf("Expected %s got %s", d.rootPath(), rootDir[1])
+	}
+	if dirs[0] != "Dirs" {
+		t.Fatalf("Expected Dirs got %s", dirs[0])
+	}
+	if dirs[1] != "1" {
+		t.Fatalf("Expected 1 got %s", dirs[1])
+	}
+}
+
+func TestApplyDiff(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	diff, err := d.Diff("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Create("2", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("3", "2"); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.ApplyDiff("3", diff); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure that the file is in the mount point for id 3
+
+	mountPoint, err := d.Get("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := os.Stat(path.Join(mountPoint, "test_file")); err != nil {
+		t.Fatal(err)
+	}
+}

+ 46 - 0
graphdriver/aufs/dirs.go

@@ -0,0 +1,46 @@
+package aufs
+
+import (
+	"bufio"
+	"io/ioutil"
+	"os"
+	"path"
+)
+
+// Return all the directories
+func loadIds(root string) ([]string, error) {
+	dirs, err := ioutil.ReadDir(root)
+	if err != nil {
+		return nil, err
+	}
+	out := []string{}
+	for _, d := range dirs {
+		if !d.IsDir() {
+			out = append(out, d.Name())
+		}
+	}
+	return out, nil
+}
+
+// Read the layers file for the current id and return all the
+// layers represented by new lines in the file
+//
+// If there are no lines in the file then the id has no parent
+// and an empty slice is returned.
+func getParentIds(root, id string) ([]string, error) {
+	f, err := os.Open(path.Join(root, "layers", id))
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	out := []string{}
+	s := bufio.NewScanner(f)
+
+	for s.Scan() {
+		if t := s.Text(); t != "" {
+			out = append(out, s.Text())
+		}
+	}
+	return out, s.Err()
+}

+ 194 - 0
graphdriver/aufs/migrate.go

@@ -0,0 +1,194 @@
+package aufs
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path"
+)
+
+type metadata struct {
+	ID       string `json:"id"`
+	ParentID string `json:"parent,omitempty"`
+	Image    string `json:"Image,omitempty"`
+
+	parent *metadata
+}
+
+func pathExists(pth string) bool {
+	if _, err := os.Stat(pth); err != nil {
+		return false
+	}
+	return true
+}
+
+// Migrate existing images and containers from docker < 0.7.x
+//
+// The format pre 0.7 is for docker to store the metadata and filesystem
+// content in the same directory.  For the migration to work we need to move Image layer
+// data from /var/lib/docker/graph/<id>/layers to the diff of the registered id.
+//
+// Next we need to migrate the container's rw layer to diff of the driver.  After the
+// contents are migrated we need to register the image and container ids with the
+// driver.
+//
+// For the migration we try to move the folder containing the layer files, if that
+// fails because the data is currently mounted we will fallback to creating a
+// symlink.
+func (a *Driver) Migrate(pth string, setupInit func(p string) error) error {
+	if pathExists(path.Join(pth, "graph")) {
+		if err := a.migrateRepositories(pth); err != nil {
+			return err
+		}
+		if err := a.migrateImages(path.Join(pth, "graph")); err != nil {
+			return err
+		}
+		return a.migrateContainers(path.Join(pth, "containers"), setupInit)
+	}
+	return nil
+}
+
+func (a *Driver) migrateRepositories(pth string) error {
+	name := path.Join(pth, "repositories")
+	if err := os.Rename(name, name+"-aufs"); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return nil
+}
+
+func (a *Driver) migrateContainers(pth string, setupInit func(p string) error) error {
+	fis, err := ioutil.ReadDir(pth)
+	if err != nil {
+		return err
+	}
+
+	for _, fi := range fis {
+		if id := fi.Name(); fi.IsDir() && pathExists(path.Join(pth, id, "rw")) {
+			if err := tryRelocate(path.Join(pth, id, "rw"), path.Join(a.rootPath(), "diff", id)); err != nil {
+				return err
+			}
+
+			if !a.Exists(id) {
+
+				metadata, err := loadMetadata(path.Join(pth, id, "config.json"))
+				if err != nil {
+					return err
+				}
+
+				initID := fmt.Sprintf("%s-init", id)
+				if err := a.Create(initID, metadata.Image); err != nil {
+					return err
+				}
+
+				initPath, err := a.Get(initID)
+				if err != nil {
+					return err
+				}
+				// setup init layer
+				if err := setupInit(initPath); err != nil {
+					return err
+				}
+
+				if err := a.Create(id, initID); err != nil {
+					return err
+				}
+			}
+		}
+	}
+	return nil
+}
+
+func (a *Driver) migrateImages(pth string) error {
+	fis, err := ioutil.ReadDir(pth)
+	if err != nil {
+		return err
+	}
+	var (
+		m       = make(map[string]*metadata)
+		current *metadata
+		exists  bool
+	)
+
+	for _, fi := range fis {
+		if id := fi.Name(); fi.IsDir() && pathExists(path.Join(pth, id, "layer")) {
+			if current, exists = m[id]; !exists {
+				current, err = loadMetadata(path.Join(pth, id, "json"))
+				if err != nil {
+					return err
+				}
+				m[id] = current
+			}
+		}
+	}
+
+	for _, v := range m {
+		v.parent = m[v.ParentID]
+	}
+
+	migrated := make(map[string]bool)
+	for _, v := range m {
+		if err := a.migrateImage(v, pth, migrated); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (a *Driver) migrateImage(m *metadata, pth string, migrated map[string]bool) error {
+	if !migrated[m.ID] {
+		if m.parent != nil {
+			a.migrateImage(m.parent, pth, migrated)
+		}
+		if err := tryRelocate(path.Join(pth, m.ID, "layer"), path.Join(a.rootPath(), "diff", m.ID)); err != nil {
+			return err
+		}
+		if !a.Exists(m.ID) {
+			if err := a.Create(m.ID, m.ParentID); err != nil {
+				return err
+			}
+		}
+		migrated[m.ID] = true
+	}
+	return nil
+}
+
+// tryRelocate will try to rename the old path to the new pack and if
+// the operation fails, it will fallback to a symlink
+func tryRelocate(oldPath, newPath string) error {
+	s, err := os.Lstat(newPath)
+	if err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	// If the destination is a symlink then we already tried to relocate once before
+	// and it failed so we delete it and try to remove
+	if s != nil && s.Mode()&os.ModeSymlink == os.ModeSymlink {
+		if err := os.RemoveAll(newPath); err != nil {
+			return err
+		}
+	}
+	if err := os.Rename(oldPath, newPath); err != nil {
+		if sErr := os.Symlink(oldPath, newPath); sErr != nil {
+			return fmt.Errorf("Unable to relocate %s to %s: Rename err %s Symlink err %s", oldPath, newPath, err, sErr)
+		}
+	}
+	return nil
+}
+
+func loadMetadata(pth string) (*metadata, error) {
+	f, err := os.Open(pth)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var (
+		out = &metadata{}
+		dec = json.NewDecoder(f)
+	)
+
+	if err := dec.Decode(out); err != nil {
+		return nil, err
+	}
+	return out, nil
+}

+ 3 - 19
mount.go → graphdriver/aufs/mount.go

@@ -1,13 +1,11 @@
-package docker
+package aufs
 
 
 import (
 import (
-	"fmt"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
 	"path/filepath"
 	"path/filepath"
 	"syscall"
 	"syscall"
-	"time"
 )
 )
 
 
 func Unmount(target string) error {
 func Unmount(target string) error {
@@ -17,22 +15,7 @@ func Unmount(target string) error {
 	if err := syscall.Unmount(target, 0); err != nil {
 	if err := syscall.Unmount(target, 0); err != nil {
 		return err
 		return err
 	}
 	}
-	// Even though we just unmounted the filesystem, AUFS will prevent deleting the mntpoint
-	// for some time. We'll just keep retrying until it succeeds.
-	for retries := 0; retries < 1000; retries++ {
-		err := os.Remove(target)
-		if err == nil {
-			// rm mntpoint succeeded
-			return nil
-		}
-		if os.IsNotExist(err) {
-			// mntpoint doesn't exist anymore. Success.
-			return nil
-		}
-		// fmt.Printf("(%v) Remove %v returned: %v\n", retries, target, err)
-		time.Sleep(10 * time.Millisecond)
-	}
-	return fmt.Errorf("Umount: Failed to umount %v", target)
+	return nil
 }
 }
 
 
 func Mounted(mountpoint string) (bool, error) {
 func Mounted(mountpoint string) (bool, error) {
@@ -49,5 +32,6 @@ func Mounted(mountpoint string) (bool, error) {
 	}
 	}
 	mntpointSt := mntpoint.Sys().(*syscall.Stat_t)
 	mntpointSt := mntpoint.Sys().(*syscall.Stat_t)
 	parentSt := parent.Sys().(*syscall.Stat_t)
 	parentSt := parent.Sys().(*syscall.Stat_t)
+
 	return mntpointSt.Dev != parentSt.Dev, nil
 	return mntpointSt.Dev != parentSt.Dev, nil
 }
 }

+ 1 - 1
mount_darwin.go → graphdriver/aufs/mount_darwin.go

@@ -1,4 +1,4 @@
-package docker
+package aufs
 
 
 import "errors"
 import "errors"
 
 

+ 1 - 1
mount_linux.go → graphdriver/aufs/mount_linux.go

@@ -1,4 +1,4 @@
-package docker
+package aufs
 
 
 import "syscall"
 import "syscall"
 
 

+ 956 - 0
graphdriver/devmapper/deviceset.go

@@ -0,0 +1,956 @@
+package devmapper
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/dotcloud/docker/utils"
+	"io"
+	"io/ioutil"
+	"path"
+	"path/filepath"
+	"strconv"
+	"sync"
+	"time"
+)
+
+var (
+	DefaultDataLoopbackSize     int64  = 100 * 1024 * 1024 * 1024
+	DefaultMetaDataLoopbackSize int64  = 2 * 1024 * 1024 * 1024
+	DefaultBaseFsSize           uint64 = 10 * 1024 * 1024 * 1024
+)
+
+type DevInfo struct {
+	Hash          string     `json:"-"`
+	DeviceId      int        `json:"device_id"`
+	Size          uint64     `json:"size"`
+	TransactionId uint64     `json:"transaction_id"`
+	Initialized   bool       `json:"initialized"`
+	devices       *DeviceSet `json:"-"`
+}
+
+type MetaData struct {
+	Devices map[string]*DevInfo `json:devices`
+}
+
+type DeviceSet struct {
+	MetaData
+	sync.Mutex
+	root             string
+	devicePrefix     string
+	TransactionId    uint64
+	NewTransactionId uint64
+	nextFreeDevice   int
+	activeMounts     map[string]int
+}
+
+type DiskUsage struct {
+	Used  uint64
+	Total uint64
+}
+
+type Status struct {
+	PoolName         string
+	DataLoopback     string
+	MetadataLoopback string
+	Data             DiskUsage
+	Metadata         DiskUsage
+	SectorSize       uint64
+}
+
+type DevStatus struct {
+	DeviceId            int
+	Size                uint64
+	TransactionId       uint64
+	SizeInSectors       uint64
+	MappedSectors       uint64
+	HighestMappedSector uint64
+}
+
+func getDevName(name string) string {
+	return "/dev/mapper/" + name
+}
+
+func (info *DevInfo) Name() string {
+	hash := info.Hash
+	if hash == "" {
+		hash = "base"
+	}
+	return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash)
+}
+
+func (info *DevInfo) DevName() string {
+	return getDevName(info.Name())
+}
+
+func (devices *DeviceSet) loopbackDir() string {
+	return path.Join(devices.root, "devicemapper")
+}
+
+func (devices *DeviceSet) jsonFile() string {
+	return path.Join(devices.loopbackDir(), "json")
+}
+
+func (devices *DeviceSet) getPoolName() string {
+	return devices.devicePrefix + "-pool"
+}
+
+func (devices *DeviceSet) getPoolDevName() string {
+	return getDevName(devices.getPoolName())
+}
+
+func (devices *DeviceSet) hasImage(name string) bool {
+	dirname := devices.loopbackDir()
+	filename := path.Join(dirname, name)
+
+	_, err := osStat(filename)
+	return err == nil
+}
+
+// ensureImage creates a sparse file of <size> bytes at the path
+// <root>/devicemapper/<name>.
+// If the file already exists, it does nothing.
+// Either way it returns the full path.
+func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) {
+	dirname := devices.loopbackDir()
+	filename := path.Join(dirname, name)
+
+	if err := osMkdirAll(dirname, 0700); err != nil && !osIsExist(err) {
+		return "", err
+	}
+
+	if _, err := osStat(filename); err != nil {
+		if !osIsNotExist(err) {
+			return "", err
+		}
+		utils.Debugf("Creating loopback file %s for device-manage use", filename)
+		file, err := osOpenFile(filename, osORdWr|osOCreate, 0600)
+		if err != nil {
+			return "", err
+		}
+		defer file.Close()
+
+		if err = file.Truncate(size); err != nil {
+			return "", err
+		}
+	}
+	return filename, nil
+}
+
+func (devices *DeviceSet) allocateDeviceId() int {
+	// TODO: Add smarter reuse of deleted devices
+	id := devices.nextFreeDevice
+	devices.nextFreeDevice = devices.nextFreeDevice + 1
+	return id
+}
+
+func (devices *DeviceSet) allocateTransactionId() uint64 {
+	devices.NewTransactionId = devices.NewTransactionId + 1
+	return devices.NewTransactionId
+}
+
+func (devices *DeviceSet) saveMetadata() error {
+	jsonData, err := json.Marshal(devices.MetaData)
+	if err != nil {
+		return fmt.Errorf("Error encoding metaadata to json: %s", err)
+	}
+	tmpFile, err := ioutil.TempFile(filepath.Dir(devices.jsonFile()), ".json")
+	if err != nil {
+		return fmt.Errorf("Error creating metadata file: %s", err)
+	}
+
+	n, err := tmpFile.Write(jsonData)
+	if err != nil {
+		return fmt.Errorf("Error writing metadata to %s: %s", tmpFile.Name(), err)
+	}
+	if n < len(jsonData) {
+		return io.ErrShortWrite
+	}
+	if err := tmpFile.Sync(); err != nil {
+		return fmt.Errorf("Error syncing metadata file %s: %s", tmpFile.Name(), err)
+	}
+	if err := tmpFile.Close(); err != nil {
+		return fmt.Errorf("Error closing metadata file %s: %s", tmpFile.Name(), err)
+	}
+	if err := osRename(tmpFile.Name(), devices.jsonFile()); err != nil {
+		return fmt.Errorf("Error committing metadata file", err)
+	}
+
+	if devices.NewTransactionId != devices.TransactionId {
+		if err = setTransactionId(devices.getPoolDevName(), devices.TransactionId, devices.NewTransactionId); err != nil {
+			return fmt.Errorf("Error setting devmapper transition ID: %s", err)
+		}
+		devices.TransactionId = devices.NewTransactionId
+	}
+	return nil
+}
+
+func (devices *DeviceSet) registerDevice(id int, hash string, size uint64) (*DevInfo, error) {
+	utils.Debugf("registerDevice(%v, %v)", id, hash)
+	info := &DevInfo{
+		Hash:          hash,
+		DeviceId:      id,
+		Size:          size,
+		TransactionId: devices.allocateTransactionId(),
+		Initialized:   false,
+		devices:       devices,
+	}
+
+	devices.Devices[hash] = info
+	if err := devices.saveMetadata(); err != nil {
+		// Try to remove unused device
+		delete(devices.Devices, hash)
+		return nil, err
+	}
+
+	return info, nil
+}
+
+func (devices *DeviceSet) activateDeviceIfNeeded(hash string) error {
+	utils.Debugf("activateDeviceIfNeeded(%v)", hash)
+	info := devices.Devices[hash]
+	if info == nil {
+		return fmt.Errorf("Unknown device %s", hash)
+	}
+
+	if devinfo, _ := getInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 {
+		return nil
+	}
+
+	return activateDevice(devices.getPoolDevName(), info.Name(), info.DeviceId, info.Size)
+}
+
+func (devices *DeviceSet) createFilesystem(info *DevInfo) error {
+	devname := info.DevName()
+
+	err := execRun("mkfs.ext4", "-E", "discard,lazy_itable_init=0,lazy_journal_init=0", devname)
+	if err != nil {
+		err = execRun("mkfs.ext4", "-E", "discard,lazy_itable_init=0", devname)
+	}
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	return nil
+}
+
+func (devices *DeviceSet) loadMetaData() error {
+	utils.Debugf("loadMetadata()")
+	defer utils.Debugf("loadMetadata END")
+	_, _, _, params, err := getStatus(devices.getPoolName())
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	if _, err := fmt.Sscanf(params, "%d", &devices.TransactionId); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	devices.NewTransactionId = devices.TransactionId
+
+	jsonData, err := ioutil.ReadFile(devices.jsonFile())
+	if err != nil && !osIsNotExist(err) {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	devices.MetaData.Devices = make(map[string]*DevInfo)
+	if jsonData != nil {
+		if err := json.Unmarshal(jsonData, &devices.MetaData); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	for hash, d := range devices.Devices {
+		d.Hash = hash
+		d.devices = devices
+
+		if d.DeviceId >= devices.nextFreeDevice {
+			devices.nextFreeDevice = d.DeviceId + 1
+		}
+
+		// If the transaction id is larger than the actual one we lost the device due to some crash
+		if d.TransactionId > devices.TransactionId {
+			utils.Debugf("Removing lost device %s with id %d", hash, d.TransactionId)
+			delete(devices.Devices, hash)
+		}
+	}
+	return nil
+}
+
+func (devices *DeviceSet) setupBaseImage() error {
+	oldInfo := devices.Devices[""]
+	if oldInfo != nil && oldInfo.Initialized {
+		return nil
+	}
+
+	if oldInfo != nil && !oldInfo.Initialized {
+		utils.Debugf("Removing uninitialized base image")
+		if err := devices.removeDevice(""); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	utils.Debugf("Initializing base device-manager snapshot")
+
+	id := devices.allocateDeviceId()
+
+	// Create initial device
+	if err := createDevice(devices.getPoolDevName(), id); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	utils.Debugf("Registering base device (id %v) with FS size %v", id, DefaultBaseFsSize)
+	info, err := devices.registerDevice(id, "", DefaultBaseFsSize)
+	if err != nil {
+		_ = deleteDevice(devices.getPoolDevName(), id)
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	utils.Debugf("Creating filesystem on base device-manager snapshot")
+
+	if err = devices.activateDeviceIfNeeded(""); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	if err := devices.createFilesystem(info); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	info.Initialized = true
+	if err = devices.saveMetadata(); err != nil {
+		info.Initialized = false
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func setCloseOnExec(name string) {
+	if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil {
+		for _, i := range fileInfos {
+			link, _ := osReadlink(filepath.Join("/proc/self/fd", i.Name()))
+			if link == name {
+				fd, err := strconv.Atoi(i.Name())
+				if err == nil {
+					sysCloseOnExec(fd)
+				}
+			}
+		}
+	}
+}
+
+func (devices *DeviceSet) log(level int, file string, line int, dmError int, message string) {
+	if level >= 7 {
+		return // Ignore _LOG_DEBUG
+	}
+
+	utils.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message)
+}
+
+func major(device uint64) uint64 {
+	return (device >> 8) & 0xfff
+}
+
+func minor(device uint64) uint64 {
+	return (device & 0xff) | ((device >> 12) & 0xfff00)
+}
+
+func (devices *DeviceSet) ResizePool(size int64) error {
+	dirname := devices.loopbackDir()
+	datafilename := path.Join(dirname, "data")
+	metadatafilename := path.Join(dirname, "metadata")
+
+	datafile, err := osOpenFile(datafilename, osORdWr, 0)
+	if datafile == nil {
+		return err
+	}
+	defer datafile.Close()
+
+	fi, err := datafile.Stat()
+	if fi == nil {
+		return err
+	}
+
+	if fi.Size() > size {
+		return fmt.Errorf("Can't shrink file")
+	}
+
+	dataloopback := FindLoopDeviceFor(&osFile{File: datafile})
+	if dataloopback == nil {
+		return fmt.Errorf("Unable to find loopback mount for: %s", datafilename)
+	}
+	defer dataloopback.Close()
+
+	metadatafile, err := osOpenFile(metadatafilename, osORdWr, 0)
+	if metadatafile == nil {
+		return err
+	}
+	defer metadatafile.Close()
+
+	metadataloopback := FindLoopDeviceFor(&osFile{File: metadatafile})
+	if metadataloopback == nil {
+		return fmt.Errorf("Unable to find loopback mount for: %s", metadatafilename)
+	}
+	defer metadataloopback.Close()
+
+	// Grow loopback file
+	if err := datafile.Truncate(size); err != nil {
+		return fmt.Errorf("Unable to grow loopback file: %s", err)
+	}
+
+	// Reload size for loopback device
+	if err := LoopbackSetCapacity(dataloopback); err != nil {
+		return fmt.Errorf("Unable to update loopback capacity: %s", err)
+	}
+
+	// Suspend the pool
+	if err := suspendDevice(devices.getPoolName()); err != nil {
+		return fmt.Errorf("Unable to suspend pool: %s", err)
+	}
+
+	// Reload with the new block sizes
+	if err := reloadPool(devices.getPoolName(), dataloopback, metadataloopback); err != nil {
+		return fmt.Errorf("Unable to reload pool: %s", err)
+	}
+
+	// Resume the pool
+	if err := resumeDevice(devices.getPoolName()); err != nil {
+		return fmt.Errorf("Unable to resume pool: %s", err)
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) initDevmapper(doInit bool) error {
+	logInit(devices)
+
+	// Make sure the sparse images exist in <root>/devicemapper/data and
+	// <root>/devicemapper/metadata
+
+	hasData := devices.hasImage("data")
+	hasMetadata := devices.hasImage("metadata")
+
+	if !doInit && !hasData {
+		return fmt.Errorf("Looback data file not found %s")
+	}
+
+	if !doInit && !hasMetadata {
+		return fmt.Errorf("Looback metadata file not found %s")
+	}
+
+	createdLoopback := !hasData || !hasMetadata
+	data, err := devices.ensureImage("data", DefaultDataLoopbackSize)
+	if err != nil {
+		utils.Debugf("Error device ensureImage (data): %s\n", err)
+		return err
+	}
+	metadata, err := devices.ensureImage("metadata", DefaultMetaDataLoopbackSize)
+	if err != nil {
+		utils.Debugf("Error device ensureImage (metadata): %s\n", err)
+		return err
+	}
+
+	// Set the device prefix from the device id and inode of the docker root dir
+
+	st, err := osStat(devices.root)
+	if err != nil {
+		return fmt.Errorf("Error looking up dir %s: %s", devices.root, err)
+	}
+	sysSt := toSysStatT(st.Sys())
+	// "reg-" stands for "regular file".
+	// In the future we might use "dev-" for "device file", etc.
+	// docker-maj,min[-inode] stands for:
+	//	- Managed by docker
+	//	- The target of this device is at major <maj> and minor <min>
+	//	- If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself.
+	devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino)
+	utils.Debugf("Generated prefix: %s", devices.devicePrefix)
+
+	// Check for the existence of the device <prefix>-pool
+	utils.Debugf("Checking for existence of the pool '%s'", devices.getPoolName())
+	info, err := getInfo(devices.getPoolName())
+	if info == nil {
+		utils.Debugf("Error device getInfo: %s", err)
+		return err
+	}
+
+	// It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files
+	// that are not Close-on-exec, and lxc-start will die if it inherits any unexpected files,
+	// so we add this badhack to make sure it closes itself
+	setCloseOnExec("/dev/mapper/control")
+
+	// If the pool doesn't exist, create it
+	if info.Exists == 0 {
+		utils.Debugf("Pool doesn't exist. Creating it.")
+
+		dataFile, err := AttachLoopDevice(data)
+		if err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+		defer dataFile.Close()
+
+		metadataFile, err := AttachLoopDevice(metadata)
+		if err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+		defer metadataFile.Close()
+
+		if err := createPool(devices.getPoolName(), dataFile, metadataFile); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	// If we didn't just create the data or metadata image, we need to
+	// load the metadata from the existing file.
+	if !createdLoopback {
+		if err = devices.loadMetaData(); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	// Setup the base image
+	if doInit {
+		if err := devices.setupBaseImage(); err != nil {
+			utils.Debugf("Error device setupBaseImage: %s\n", err)
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
+	devices.Lock()
+	defer devices.Unlock()
+
+	if devices.Devices[hash] != nil {
+		return fmt.Errorf("hash %s already exists", hash)
+	}
+
+	baseInfo := devices.Devices[baseHash]
+	if baseInfo == nil {
+		return fmt.Errorf("Error adding device for '%s': can't find device for parent '%s'", hash, baseHash)
+	}
+
+	deviceId := devices.allocateDeviceId()
+
+	if err := devices.createSnapDevice(devices.getPoolDevName(), deviceId, baseInfo.Name(), baseInfo.DeviceId); err != nil {
+		utils.Debugf("Error creating snap device: %s\n", err)
+		return err
+	}
+
+	if _, err := devices.registerDevice(deviceId, hash, baseInfo.Size); err != nil {
+		deleteDevice(devices.getPoolDevName(), deviceId)
+		utils.Debugf("Error registering device: %s\n", err)
+		return err
+	}
+	return nil
+}
+
+func (devices *DeviceSet) removeDevice(hash string) error {
+	info := devices.Devices[hash]
+	if info == nil {
+		return fmt.Errorf("hash %s doesn't exists", hash)
+	}
+
+	devinfo, _ := getInfo(info.Name())
+	if devinfo != nil && devinfo.Exists != 0 {
+		if err := removeDevice(info.Name()); err != nil {
+			utils.Debugf("Error removing device: %s\n", err)
+			return err
+		}
+	}
+
+	if info.Initialized {
+		info.Initialized = false
+		if err := devices.saveMetadata(); err != nil {
+			utils.Debugf("Error saving meta data: %s\n", err)
+			return err
+		}
+	}
+
+	if err := deleteDevice(devices.getPoolDevName(), info.DeviceId); err != nil {
+		utils.Debugf("Error deleting device: %s\n", err)
+		return err
+	}
+
+	devices.allocateTransactionId()
+	delete(devices.Devices, info.Hash)
+
+	if err := devices.saveMetadata(); err != nil {
+		devices.Devices[info.Hash] = info
+		utils.Debugf("Error saving meta data: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) RemoveDevice(hash string) error {
+	devices.Lock()
+	defer devices.Unlock()
+
+	return devices.removeDevice(hash)
+}
+
+func (devices *DeviceSet) deactivateDevice(hash string) error {
+	utils.Debugf("[devmapper] deactivateDevice(%s)", hash)
+	defer utils.Debugf("[devmapper] deactivateDevice END")
+	var devname string
+	// FIXME: shouldn't we just register the pool into devices?
+	devname, err := devices.byHash(hash)
+	if err != nil {
+		return err
+	}
+	devinfo, err := getInfo(devname)
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	if devinfo.Exists != 0 {
+		if err := removeDevice(devname); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+		if err := devices.waitRemove(hash); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// waitRemove blocks until either:
+// a) the device registered at <device_set_prefix>-<hash> is removed,
+// or b) the 1 second timeout expires.
+func (devices *DeviceSet) waitRemove(hash string) error {
+	utils.Debugf("[deviceset %s] waitRemove(%s)", devices.devicePrefix, hash)
+	defer utils.Debugf("[deviceset %s] waitRemove END", devices.devicePrefix, hash)
+	devname, err := devices.byHash(hash)
+	if err != nil {
+		return err
+	}
+	i := 0
+	for ; i < 1000; i += 1 {
+		devinfo, err := getInfo(devname)
+		if err != nil {
+			// If there is an error we assume the device doesn't exist.
+			// The error might actually be something else, but we can't differentiate.
+			return nil
+		}
+		if i%100 == 0 {
+			utils.Debugf("Waiting for removal of %s: exists=%d", devname, devinfo.Exists)
+		}
+		if devinfo.Exists == 0 {
+			break
+		}
+
+		time.Sleep(1 * time.Millisecond)
+	}
+	if i == 1000 {
+		return fmt.Errorf("Timeout while waiting for device %s to be removed", devname)
+	}
+	return nil
+}
+
+// waitClose blocks until either:
+// a) the device registered at <device_set_prefix>-<hash> is closed,
+// or b) the 1 second timeout expires.
+func (devices *DeviceSet) waitClose(hash string) error {
+	devname, err := devices.byHash(hash)
+	if err != nil {
+		return err
+	}
+	i := 0
+	for ; i < 1000; i += 1 {
+		devinfo, err := getInfo(devname)
+		if err != nil {
+			return err
+		}
+		if i%100 == 0 {
+			utils.Debugf("Waiting for unmount of %s: opencount=%d", devname, devinfo.OpenCount)
+		}
+		if devinfo.OpenCount == 0 {
+			break
+		}
+		time.Sleep(1 * time.Millisecond)
+	}
+	if i == 1000 {
+		return fmt.Errorf("Timeout while waiting for device %s to close", devname)
+	}
+	return nil
+}
+
+// byHash is a hack to allow looking up the deviceset's pool by the hash "pool".
+// FIXME: it seems probably cleaner to register the pool in devices.Devices,
+// but I am afraid of arcane implications deep in the devicemapper code,
+// so this will do.
+func (devices *DeviceSet) byHash(hash string) (devname string, err error) {
+	if hash == "pool" {
+		return devices.getPoolDevName(), nil
+	}
+	info := devices.Devices[hash]
+	if info == nil {
+		return "", fmt.Errorf("hash %s doesn't exists", hash)
+	}
+	return info.Name(), nil
+}
+
+func (devices *DeviceSet) Shutdown() error {
+	devices.Lock()
+	defer devices.Unlock()
+
+	utils.Debugf("[deviceset %s] shutdown()", devices.devicePrefix)
+	utils.Debugf("[devmapper] Shutting down DeviceSet: %s", devices.root)
+	defer utils.Debugf("[deviceset %s] shutdown END", devices.devicePrefix)
+
+	for path, count := range devices.activeMounts {
+		for i := count; i > 0; i-- {
+			if err := sysUnmount(path, 0); err != nil {
+				utils.Debugf("Shutdown unmounting %s, error: %s\n", path, err)
+			}
+		}
+		delete(devices.activeMounts, path)
+	}
+
+	for _, d := range devices.Devices {
+		if err := devices.waitClose(d.Hash); err != nil {
+			utils.Errorf("Warning: error waiting for device %s to unmount: %s\n", d.Hash, err)
+		}
+		if err := devices.deactivateDevice(d.Hash); err != nil {
+			utils.Debugf("Shutdown deactivate %s , error: %s\n", d.Hash, err)
+		}
+	}
+
+	pool := devices.getPoolDevName()
+	if devinfo, err := getInfo(pool); err == nil && devinfo.Exists != 0 {
+		if err := devices.deactivateDevice("pool"); err != nil {
+			utils.Debugf("Shutdown deactivate %s , error: %s\n", pool, err)
+		}
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) MountDevice(hash, path string, readOnly bool) error {
+	devices.Lock()
+	defer devices.Unlock()
+
+	if err := devices.activateDeviceIfNeeded(hash); err != nil {
+		return fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
+	}
+
+	info := devices.Devices[hash]
+
+	var flags uintptr = sysMsMgcVal
+
+	if readOnly {
+		flags = flags | sysMsRdOnly
+	}
+
+	err := sysMount(info.DevName(), path, "ext4", flags, "discard")
+	if err != nil && err == sysEInval {
+		err = sysMount(info.DevName(), path, "ext4", flags, "")
+	}
+	if err != nil {
+		return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), path, err)
+	}
+
+	count := devices.activeMounts[path]
+	devices.activeMounts[path] = count + 1
+
+	return devices.setInitialized(hash)
+}
+
+func (devices *DeviceSet) UnmountDevice(hash, path string, deactivate bool) error {
+	utils.Debugf("[devmapper] UnmountDevice(hash=%s path=%s)", hash, path)
+	defer utils.Debugf("[devmapper] UnmountDevice END")
+	devices.Lock()
+	defer devices.Unlock()
+
+	utils.Debugf("[devmapper] Unmount(%s)", path)
+	if err := sysUnmount(path, 0); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	utils.Debugf("[devmapper] Unmount done")
+	// Wait for the unmount to be effective,
+	// by watching the value of Info.OpenCount for the device
+	if err := devices.waitClose(hash); err != nil {
+		return err
+	}
+
+	if count := devices.activeMounts[path]; count > 1 {
+		devices.activeMounts[path] = count - 1
+	} else {
+		delete(devices.activeMounts, path)
+	}
+
+	if deactivate {
+		devices.deactivateDevice(hash)
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) HasDevice(hash string) bool {
+	devices.Lock()
+	defer devices.Unlock()
+
+	return devices.Devices[hash] != nil
+}
+
+func (devices *DeviceSet) HasInitializedDevice(hash string) bool {
+	devices.Lock()
+	defer devices.Unlock()
+
+	info := devices.Devices[hash]
+	return info != nil && info.Initialized
+}
+
+func (devices *DeviceSet) HasActivatedDevice(hash string) bool {
+	devices.Lock()
+	defer devices.Unlock()
+
+	info := devices.Devices[hash]
+	if info == nil {
+		return false
+	}
+	devinfo, _ := getInfo(info.Name())
+	return devinfo != nil && devinfo.Exists != 0
+}
+
+func (devices *DeviceSet) setInitialized(hash string) error {
+	info := devices.Devices[hash]
+	if info == nil {
+		return fmt.Errorf("Unknown device %s", hash)
+	}
+
+	info.Initialized = true
+	if err := devices.saveMetadata(); err != nil {
+		info.Initialized = false
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) List() []string {
+	devices.Lock()
+	defer devices.Unlock()
+
+	ids := make([]string, len(devices.Devices))
+	i := 0
+	for k := range devices.Devices {
+		ids[i] = k
+		i++
+	}
+	return ids
+}
+
+func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) {
+	var params string
+	_, sizeInSectors, _, params, err = getStatus(devName)
+	if err != nil {
+		return
+	}
+	if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil {
+		return
+	}
+	return
+}
+
+func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
+	devices.Lock()
+	defer devices.Unlock()
+
+	info := devices.Devices[hash]
+	if info == nil {
+		return nil, fmt.Errorf("No device %s", hash)
+	}
+
+	status := &DevStatus{
+		DeviceId:      info.DeviceId,
+		Size:          info.Size,
+		TransactionId: info.TransactionId,
+	}
+
+	if err := devices.activateDeviceIfNeeded(hash); err != nil {
+		return nil, fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
+	}
+
+	if sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()); err != nil {
+		return nil, err
+	} else {
+		status.SizeInSectors = sizeInSectors
+		status.MappedSectors = mappedSectors
+		status.HighestMappedSector = highestMappedSector
+	}
+
+	return status, nil
+}
+
+func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionId, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) {
+	var params string
+	if _, totalSizeInSectors, _, params, err = getStatus(devices.getPoolName()); err == nil {
+		_, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionId, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal)
+	}
+	return
+}
+
+func (devices *DeviceSet) Status() *Status {
+	devices.Lock()
+	defer devices.Unlock()
+
+	status := &Status{}
+
+	status.PoolName = devices.getPoolName()
+	status.DataLoopback = path.Join(devices.loopbackDir(), "data")
+	status.MetadataLoopback = path.Join(devices.loopbackDir(), "metadata")
+
+	totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus()
+	if err == nil {
+		// Convert from blocks to bytes
+		blockSizeInSectors := totalSizeInSectors / dataTotal
+
+		status.Data.Used = dataUsed * blockSizeInSectors * 512
+		status.Data.Total = dataTotal * blockSizeInSectors * 512
+
+		// metadata blocks are always 4k
+		status.Metadata.Used = metadataUsed * 4096
+		status.Metadata.Total = metadataTotal * 4096
+
+		status.SectorSize = blockSizeInSectors * 512
+	}
+
+	return status
+}
+
+func NewDeviceSet(root string, doInit bool) (*DeviceSet, error) {
+	SetDevDir("/dev")
+
+	devices := &DeviceSet{
+		root:         root,
+		MetaData:     MetaData{Devices: make(map[string]*DevInfo)},
+		activeMounts: make(map[string]int),
+	}
+
+	if err := devices.initDevmapper(doInit); err != nil {
+		return nil, err
+	}
+
+	return devices, nil
+}

+ 576 - 0
graphdriver/devmapper/devmapper.go

@@ -0,0 +1,576 @@
+package devmapper
+
+import (
+	"errors"
+	"fmt"
+	"github.com/dotcloud/docker/utils"
+	"runtime"
+)
+
+type DevmapperLogger interface {
+	log(level int, file string, line int, dmError int, message string)
+}
+
+const (
+	DeviceCreate TaskType = iota
+	DeviceReload
+	DeviceRemove
+	DeviceRemoveAll
+	DeviceSuspend
+	DeviceResume
+	DeviceInfo
+	DeviceDeps
+	DeviceRename
+	DeviceVersion
+	DeviceStatus
+	DeviceTable
+	DeviceWaitevent
+	DeviceList
+	DeviceClear
+	DeviceMknodes
+	DeviceListVersions
+	DeviceTargetMsg
+	DeviceSetGeometry
+)
+
+const (
+	AddNodeOnResume AddNodeType = iota
+	AddNodeOnCreate
+)
+
+var (
+	ErrTaskRun                = errors.New("dm_task_run failed")
+	ErrTaskSetName            = errors.New("dm_task_set_name failed")
+	ErrTaskSetMessage         = errors.New("dm_task_set_message failed")
+	ErrTaskSetAddNode         = errors.New("dm_task_set_add_node failed")
+	ErrTaskSetRo              = errors.New("dm_task_set_ro failed")
+	ErrTaskAddTarget          = errors.New("dm_task_add_target failed")
+	ErrTaskSetSector          = errors.New("dm_task_set_sector failed")
+	ErrTaskGetInfo            = errors.New("dm_task_get_info failed")
+	ErrTaskSetCookie          = errors.New("dm_task_set_cookie failed")
+	ErrNilCookie              = errors.New("cookie ptr can't be nil")
+	ErrAttachLoopbackDevice   = errors.New("loopback mounting failed")
+	ErrGetBlockSize           = errors.New("Can't get block size")
+	ErrUdevWait               = errors.New("wait on udev cookie failed")
+	ErrSetDevDir              = errors.New("dm_set_dev_dir failed")
+	ErrGetLibraryVersion      = errors.New("dm_get_library_version failed")
+	ErrCreateRemoveTask       = errors.New("Can't create task of type DeviceRemove")
+	ErrRunRemoveDevice        = errors.New("running removeDevice failed")
+	ErrInvalidAddNode         = errors.New("Invalide AddNoce type")
+	ErrGetLoopbackBackingFile = errors.New("Unable to get loopback backing file")
+	ErrLoopbackSetCapacity    = errors.New("Unable set loopback capacity")
+)
+
+type (
+	Task struct {
+		unmanaged *CDmTask
+	}
+	Info struct {
+		Exists        int
+		Suspended     int
+		LiveTable     int
+		InactiveTable int
+		OpenCount     int32
+		EventNr       uint32
+		Major         uint32
+		Minor         uint32
+		ReadOnly      int
+		TargetCount   int32
+	}
+	TaskType    int
+	AddNodeType int
+)
+
+func (t *Task) destroy() {
+	if t != nil {
+		DmTaskDestroy(t.unmanaged)
+		runtime.SetFinalizer(t, nil)
+	}
+}
+
+func TaskCreate(tasktype TaskType) *Task {
+	Ctask := DmTaskCreate(int(tasktype))
+	if Ctask == nil {
+		return nil
+	}
+	task := &Task{unmanaged: Ctask}
+	runtime.SetFinalizer(task, (*Task).destroy)
+	return task
+}
+
+func (t *Task) Run() error {
+	if res := DmTaskRun(t.unmanaged); res != 1 {
+		return ErrTaskRun
+	}
+	return nil
+}
+
+func (t *Task) SetName(name string) error {
+	if res := DmTaskSetName(t.unmanaged, name); res != 1 {
+		return ErrTaskSetName
+	}
+	return nil
+}
+
+func (t *Task) SetMessage(message string) error {
+	if res := DmTaskSetMessage(t.unmanaged, message); res != 1 {
+		return ErrTaskSetMessage
+	}
+	return nil
+}
+
+func (t *Task) SetSector(sector uint64) error {
+	if res := DmTaskSetSector(t.unmanaged, sector); res != 1 {
+		return ErrTaskSetSector
+	}
+	return nil
+}
+
+func (t *Task) SetCookie(cookie *uint, flags uint16) error {
+	if cookie == nil {
+		return ErrNilCookie
+	}
+	if res := DmTaskSetCookie(t.unmanaged, cookie, flags); res != 1 {
+		return ErrTaskSetCookie
+	}
+	return nil
+}
+
+func (t *Task) SetAddNode(addNode AddNodeType) error {
+	if addNode != AddNodeOnResume && addNode != AddNodeOnCreate {
+		return ErrInvalidAddNode
+	}
+	if res := DmTaskSetAddNode(t.unmanaged, addNode); res != 1 {
+		return ErrTaskSetAddNode
+	}
+	return nil
+}
+
+func (t *Task) SetRo() error {
+	if res := DmTaskSetRo(t.unmanaged); res != 1 {
+		return ErrTaskSetRo
+	}
+	return nil
+}
+
+func (t *Task) AddTarget(start, size uint64, ttype, params string) error {
+	if res := DmTaskAddTarget(t.unmanaged, start, size,
+		ttype, params); res != 1 {
+		return ErrTaskAddTarget
+	}
+	return nil
+}
+
+func (t *Task) GetInfo() (*Info, error) {
+	info := &Info{}
+	if res := DmTaskGetInfo(t.unmanaged, info); res != 1 {
+		return nil, ErrTaskGetInfo
+	}
+	return info, nil
+}
+
+func (t *Task) GetNextTarget(next uintptr) (nextPtr uintptr, start uint64,
+	length uint64, targetType string, params string) {
+
+	return DmGetNextTarget(t.unmanaged, next, &start, &length,
+			&targetType, &params),
+		start, length, targetType, params
+}
+
+func AttachLoopDevice(filename string) (*osFile, error) {
+	var fd int
+	res := DmAttachLoopDevice(filename, &fd)
+	if res == "" {
+		return nil, ErrAttachLoopbackDevice
+	}
+	return &osFile{File: osNewFile(uintptr(fd), res)}, nil
+}
+
+func getLoopbackBackingFile(file *osFile) (uint64, uint64, error) {
+	dev, inode, err := DmGetLoopbackBackingFile(file.Fd())
+	if err != 0 {
+		return 0, 0, ErrGetLoopbackBackingFile
+	}
+	return dev, inode, nil
+}
+
+func LoopbackSetCapacity(file *osFile) error {
+	if err := DmLoopbackSetCapacity(file.Fd()); err != 0 {
+		return ErrLoopbackSetCapacity
+	}
+	return nil
+}
+
+func FindLoopDeviceFor(file *osFile) *osFile {
+	stat, err := file.Stat()
+	if err != nil {
+		return nil
+	}
+	targetInode := stat.Sys().(*sysStatT).Ino
+	targetDevice := stat.Sys().(*sysStatT).Dev
+
+	for i := 0; true; i++ {
+		path := fmt.Sprintf("/dev/loop%d", i)
+
+		file, err := osOpenFile(path, osORdWr, 0)
+		if err != nil {
+			if osIsNotExist(err) {
+				return nil
+			}
+
+			// Ignore all errors until the first not-exist
+			// we want to continue looking for the file
+			continue
+		}
+
+		dev, inode, err := getLoopbackBackingFile(&osFile{File: file})
+		if err == nil && dev == targetDevice && inode == targetInode {
+			return &osFile{File: file}
+		}
+
+		file.Close()
+	}
+
+	return nil
+}
+
+func UdevWait(cookie uint) error {
+	if res := DmUdevWait(cookie); res != 1 {
+		utils.Debugf("Failed to wait on udev cookie %d", cookie)
+		return ErrUdevWait
+	}
+	return nil
+}
+
+func LogInitVerbose(level int) {
+	DmLogInitVerbose(level)
+}
+
+var dmLogger DevmapperLogger = nil
+
+func logInit(logger DevmapperLogger) {
+	dmLogger = logger
+	LogWithErrnoInit()
+}
+
+func SetDevDir(dir string) error {
+	if res := DmSetDevDir(dir); res != 1 {
+		utils.Debugf("Error dm_set_dev_dir")
+		return ErrSetDevDir
+	}
+	return nil
+}
+
+func GetLibraryVersion() (string, error) {
+	var version string
+	if res := DmGetLibraryVersion(&version); res != 1 {
+		return "", ErrGetLibraryVersion
+	}
+	return version, nil
+}
+
+// Useful helper for cleanup
+func RemoveDevice(name string) error {
+	task := TaskCreate(DeviceRemove)
+	if task == nil {
+		return ErrCreateRemoveTask
+	}
+	if err := task.SetName(name); err != nil {
+		utils.Debugf("Can't set task name %s", name)
+		return err
+	}
+	if err := task.Run(); err != nil {
+		return ErrRunRemoveDevice
+	}
+	return nil
+}
+
+func GetBlockDeviceSize(file *osFile) (uint64, error) {
+	size, errno := DmGetBlockSize(file.Fd())
+	if size == -1 || errno != 0 {
+		return 0, ErrGetBlockSize
+	}
+	return uint64(size), nil
+}
+
+// This is the programmatic example of "dmsetup create"
+func createPool(poolName string, dataFile, metadataFile *osFile) error {
+	task, err := createTask(DeviceCreate, poolName)
+	if task == nil {
+		return err
+	}
+
+	size, err := GetBlockDeviceSize(dataFile)
+	if err != nil {
+		return fmt.Errorf("Can't get data size")
+	}
+
+	params := metadataFile.Name() + " " + dataFile.Name() + " 128 32768"
+	if err := task.AddTarget(0, size/512, "thin-pool", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate (createPool)")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func reloadPool(poolName string, dataFile, metadataFile *osFile) error {
+	task, err := createTask(DeviceReload, poolName)
+	if task == nil {
+		return err
+	}
+
+	size, err := GetBlockDeviceSize(dataFile)
+	if err != nil {
+		return fmt.Errorf("Can't get data size")
+	}
+
+	params := metadataFile.Name() + " " + dataFile.Name() + " 128 32768"
+	if err := task.AddTarget(0, size/512, "thin-pool", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate")
+	}
+
+	return nil
+}
+
+func createTask(t TaskType, name string) (*Task, error) {
+	task := TaskCreate(t)
+	if task == nil {
+		return nil, fmt.Errorf("Can't create task of type %d", int(t))
+	}
+	if err := task.SetName(name); err != nil {
+		return nil, fmt.Errorf("Can't set task name %s", name)
+	}
+	return task, nil
+}
+
+func getInfo(name string) (*Info, error) {
+	task, err := createTask(DeviceInfo, name)
+	if task == nil {
+		return nil, err
+	}
+	if err := task.Run(); err != nil {
+		return nil, err
+	}
+	return task.GetInfo()
+}
+
+func getStatus(name string) (uint64, uint64, string, string, error) {
+	task, err := createTask(DeviceStatus, name)
+	if task == nil {
+		utils.Debugf("getStatus: Error createTask: %s", err)
+		return 0, 0, "", "", err
+	}
+	if err := task.Run(); err != nil {
+		utils.Debugf("getStatus: Error Run: %s", err)
+		return 0, 0, "", "", err
+	}
+
+	devinfo, err := task.GetInfo()
+	if err != nil {
+		utils.Debugf("getStatus: Error GetInfo: %s", err)
+		return 0, 0, "", "", err
+	}
+	if devinfo.Exists == 0 {
+		utils.Debugf("getStatus: Non existing device %s", name)
+		return 0, 0, "", "", fmt.Errorf("Non existing device %s", name)
+	}
+
+	_, start, length, targetType, params := task.GetNextTarget(0)
+	return start, length, targetType, params, nil
+}
+
+func setTransactionId(poolName string, oldId uint64, newId uint64) error {
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("set_transaction_id %d %d", oldId, newId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running setTransactionId")
+	}
+	return nil
+}
+
+func suspendDevice(name string) error {
+	task, err := createTask(DeviceSuspend, name)
+	if task == nil {
+		return err
+	}
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceSuspend")
+	}
+	return nil
+}
+
+func resumeDevice(name string) error {
+	task, err := createTask(DeviceResume, name)
+	if task == nil {
+		return err
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceSuspend")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func createDevice(poolName string, deviceId int) error {
+	utils.Debugf("[devmapper] createDevice(poolName=%v, deviceId=%v)", poolName, deviceId)
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("create_thin %d", deviceId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running createDevice")
+	}
+	return nil
+}
+
+func deleteDevice(poolName string, deviceId int) error {
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("delete %d", deviceId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running deleteDevice")
+	}
+	return nil
+}
+
+func removeDevice(name string) error {
+	utils.Debugf("[devmapper] removeDevice START")
+	defer utils.Debugf("[devmapper] removeDevice END")
+	task, err := createTask(DeviceRemove, name)
+	if task == nil {
+		return err
+	}
+	if err = task.Run(); err != nil {
+		return fmt.Errorf("Error running removeDevice")
+	}
+	return nil
+}
+
+func activateDevice(poolName string, name string, deviceId int, size uint64) error {
+	task, err := createTask(DeviceCreate, name)
+	if task == nil {
+		return err
+	}
+
+	params := fmt.Sprintf("%s %d", poolName, deviceId)
+	if err := task.AddTarget(0, size/512, "thin", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+	if err := task.SetAddNode(AddNodeOnCreate); err != nil {
+		return fmt.Errorf("Can't add node")
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate (activateDevice)")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func (devices *DeviceSet) createSnapDevice(poolName string, deviceId int, baseName string, baseDeviceId int) error {
+	devinfo, _ := getInfo(baseName)
+	doSuspend := devinfo != nil && devinfo.Exists != 0
+
+	if doSuspend {
+		if err := suspendDevice(baseName); err != nil {
+			return err
+		}
+	}
+
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("create_snap %d %d", deviceId, baseDeviceId)); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Error running DeviceCreate (createSnapDevice)")
+	}
+
+	if doSuspend {
+		if err := resumeDevice(baseName); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}

+ 106 - 0
graphdriver/devmapper/devmapper_doc.go

@@ -0,0 +1,106 @@
+package devmapper
+
+// Definition of struct dm_task and sub structures (from lvm2)
+//
+// struct dm_ioctl {
+// 	/*
+// 	 * The version number is made up of three parts:
+// 	 * major - no backward or forward compatibility,
+// 	 * minor - only backwards compatible,
+// 	 * patch - both backwards and forwards compatible.
+// 	 *
+// 	 * All clients of the ioctl interface should fill in the
+// 	 * version number of the interface that they were
+// 	 * compiled with.
+// 	 *
+// 	 * All recognised ioctl commands (ie. those that don't
+// 	 * return -ENOTTY) fill out this field, even if the
+// 	 * command failed.
+// 	 */
+// 	uint32_t version[3];	/* in/out */
+// 	uint32_t data_size;	/* total size of data passed in
+// 				 * including this struct */
+
+// 	uint32_t data_start;	/* offset to start of data
+// 				 * relative to start of this struct */
+
+// 	uint32_t target_count;	/* in/out */
+// 	int32_t open_count;	/* out */
+// 	uint32_t flags;		/* in/out */
+
+// 	/*
+// 	 * event_nr holds either the event number (input and output) or the
+// 	 * udev cookie value (input only).
+// 	 * The DM_DEV_WAIT ioctl takes an event number as input.
+// 	 * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+// 	 * use the field as a cookie to return in the DM_COOKIE
+// 	 * variable with the uevents they issue.
+// 	 * For output, the ioctls return the event number, not the cookie.
+// 	 */
+// 	uint32_t event_nr;      	/* in/out */
+// 	uint32_t padding;
+
+// 	uint64_t dev;		/* in/out */
+
+// 	char name[DM_NAME_LEN];	/* device name */
+// 	char uuid[DM_UUID_LEN];	/* unique identifier for
+// 				 * the block device */
+// 	char data[7];		/* padding or data */
+// };
+
+// struct target {
+// 	uint64_t start;
+// 	uint64_t length;
+// 	char *type;
+// 	char *params;
+
+// 	struct target *next;
+// };
+
+// typedef enum {
+// 	DM_ADD_NODE_ON_RESUME, /* add /dev/mapper node with dmsetup resume */
+// 	DM_ADD_NODE_ON_CREATE  /* add /dev/mapper node with dmsetup create */
+// } dm_add_node_t;
+
+// struct dm_task {
+// 	int type;
+// 	char *dev_name;
+// 	char *mangled_dev_name;
+
+// 	struct target *head, *tail;
+
+// 	int read_only;
+// 	uint32_t event_nr;
+// 	int major;
+// 	int minor;
+// 	int allow_default_major_fallback;
+// 	uid_t uid;
+// 	gid_t gid;
+// 	mode_t mode;
+// 	uint32_t read_ahead;
+// 	uint32_t read_ahead_flags;
+// 	union {
+// 		struct dm_ioctl *v4;
+// 	} dmi;
+// 	char *newname;
+// 	char *message;
+// 	char *geometry;
+// 	uint64_t sector;
+// 	int no_flush;
+// 	int no_open_count;
+// 	int skip_lockfs;
+// 	int query_inactive_table;
+// 	int suppress_identical_reload;
+// 	dm_add_node_t add_node;
+// 	uint64_t existing_table_size;
+// 	int cookie_set;
+// 	int new_uuid;
+// 	int secure_data;
+// 	int retry_remove;
+// 	int enable_checks;
+// 	int expected_errno;
+
+// 	char *uuid;
+// 	char *mangled_uuid;
+// };
+//

+ 13 - 0
graphdriver/devmapper/devmapper_log.go

@@ -0,0 +1,13 @@
+package devmapper
+
+import "C"
+
+// Due to the way cgo works this has to be in a separate file, as devmapper.go has
+// definitions in the cgo block, which is incompatible with using "//export"
+
+//export DevmapperLogCallback
+func DevmapperLogCallback(level C.int, file *C.char, line C.int, dm_errno_or_class C.int, message *C.char) {
+	if dmLogger != nil {
+		dmLogger.log(int(level), C.GoString(file), int(line), int(dm_errno_or_class), C.GoString(message))
+	}
+}

+ 285 - 0
graphdriver/devmapper/devmapper_test.go

@@ -0,0 +1,285 @@
+package devmapper
+
+import (
+	"testing"
+)
+
+func TestTaskCreate(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	// Test success
+	taskCreate(t, DeviceInfo)
+
+	// Test Failure
+	DmTaskCreate = dmTaskCreateFail
+	defer func() { DmTaskCreate = dmTaskCreateFct }()
+	if task := TaskCreate(-1); task != nil {
+		t.Fatalf("An error should have occured while creating an invalid task.")
+	}
+}
+
+func TestTaskRun(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	// Perform the RUN
+	if err := task.Run(); err != nil {
+		t.Fatal(err)
+	}
+	// Make sure we don't have error with GetInfo
+	if _, err := task.GetInfo(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskRun = dmTaskRunFail
+	defer func() { DmTaskRun = dmTaskRunFct }()
+
+	task = taskCreate(t, DeviceInfo)
+	// Perform the RUN
+	if err := task.Run(); err != ErrTaskRun {
+		t.Fatalf("An error should have occured while running task.")
+	}
+	// Make sure GetInfo also fails
+	if _, err := task.GetInfo(); err != ErrTaskGetInfo {
+		t.Fatalf("GetInfo should fail if task.Run() failed.")
+	}
+}
+
+func TestTaskSetName(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetName("test"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetName = dmTaskSetNameFail
+	defer func() { DmTaskSetName = dmTaskSetNameFct }()
+
+	if err := task.SetName("test"); err != ErrTaskSetName {
+		t.Fatalf("An error should have occured while runnign SetName.")
+	}
+}
+
+func TestTaskSetMessage(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetMessage("test"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetMessage = dmTaskSetMessageFail
+	defer func() { DmTaskSetMessage = dmTaskSetMessageFct }()
+
+	if err := task.SetMessage("test"); err != ErrTaskSetMessage {
+		t.Fatalf("An error should have occured while runnign SetMessage.")
+	}
+}
+
+func TestTaskSetSector(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetSector(128); err != nil {
+		t.Fatal(err)
+	}
+
+	DmTaskSetSector = dmTaskSetSectorFail
+	defer func() { DmTaskSetSector = dmTaskSetSectorFct }()
+
+	// Test failure
+	if err := task.SetSector(0); err != ErrTaskSetSector {
+		t.Fatalf("An error should have occured while running SetSector.")
+	}
+}
+
+func TestTaskSetCookie(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	var (
+		cookie uint = 0
+		task        = taskCreate(t, DeviceInfo)
+	)
+
+	// Test success
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	if err := task.SetCookie(nil, 0); err != ErrNilCookie {
+		t.Fatalf("An error should have occured while running SetCookie with nil cookie.")
+	}
+
+	DmTaskSetCookie = dmTaskSetCookieFail
+	defer func() { DmTaskSetCookie = dmTaskSetCookieFct }()
+
+	if err := task.SetCookie(&cookie, 0); err != ErrTaskSetCookie {
+		t.Fatalf("An error should have occured while running SetCookie.")
+	}
+}
+
+func TestTaskSetAddNode(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetAddNode(0); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	if err := task.SetAddNode(-1); err != ErrInvalidAddNode {
+		t.Fatalf("An error should have occured running SetAddNode with wrong node.")
+	}
+
+	DmTaskSetAddNode = dmTaskSetAddNodeFail
+	defer func() { DmTaskSetAddNode = dmTaskSetAddNodeFct }()
+
+	if err := task.SetAddNode(0); err != ErrTaskSetAddNode {
+		t.Fatalf("An error should have occured running SetAddNode.")
+	}
+}
+
+func TestTaskSetRo(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetRo(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetRo = dmTaskSetRoFail
+	defer func() { DmTaskSetRo = dmTaskSetRoFct }()
+
+	if err := task.SetRo(); err != ErrTaskSetRo {
+		t.Fatalf("An error should have occured running SetRo.")
+	}
+}
+
+func TestTaskAddTarget(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.AddTarget(0, 128, "thinp", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskAddTarget = dmTaskAddTargetFail
+	defer func() { DmTaskAddTarget = dmTaskAddTargetFct }()
+
+	if err := task.AddTarget(0, 128, "thinp", ""); err != ErrTaskAddTarget {
+		t.Fatalf("An error should have occured running AddTarget.")
+	}
+}
+
+// func TestTaskGetInfo(t *testing.T) {
+// 	task := taskCreate(t, DeviceInfo)
+
+// 	// Test success
+// 	if _, err := task.GetInfo(); err != nil {
+// 		t.Fatal(err)
+// 	}
+
+// 	// Test failure
+// 	DmTaskGetInfo = dmTaskGetInfoFail
+// 	defer func() { DmTaskGetInfo = dmTaskGetInfoFct }()
+
+// 	if _, err := task.GetInfo(); err != ErrTaskGetInfo {
+// 		t.Fatalf("An error should have occured running GetInfo.")
+// 	}
+// }
+
+// func TestTaskGetNextTarget(t *testing.T) {
+// 	task := taskCreate(t, DeviceInfo)
+
+// 	if next, _, _, _, _ := task.GetNextTarget(0); next == 0 {
+// 		t.Fatalf("The next target should not be 0.")
+// 	}
+// }
+
+/// Utils
+func taskCreate(t *testing.T, taskType TaskType) *Task {
+	task := TaskCreate(taskType)
+	if task == nil {
+		t.Fatalf("Error creating task")
+	}
+	return task
+}
+
+/// Failure function replacement
+func dmTaskCreateFail(t int) *CDmTask {
+	return nil
+}
+
+func dmTaskRunFail(task *CDmTask) int {
+	return -1
+}
+
+func dmTaskSetNameFail(task *CDmTask, name string) int {
+	return -1
+}
+
+func dmTaskSetMessageFail(task *CDmTask, message string) int {
+	return -1
+}
+
+func dmTaskSetSectorFail(task *CDmTask, sector uint64) int {
+	return -1
+}
+
+func dmTaskSetCookieFail(task *CDmTask, cookie *uint, flags uint16) int {
+	return -1
+}
+
+func dmTaskSetAddNodeFail(task *CDmTask, addNode AddNodeType) int {
+	return -1
+}
+
+func dmTaskSetRoFail(task *CDmTask) int {
+	return -1
+}
+
+func dmTaskAddTargetFail(task *CDmTask,
+	start, size uint64, ttype, params string) int {
+	return -1
+}
+
+func dmTaskGetInfoFail(task *CDmTask, info *Info) int {
+	return -1
+}
+
+func dmGetNextTargetFail(task *CDmTask, next uintptr, start, length *uint64,
+	target, params *string) uintptr {
+	return 0
+}
+
+func dmAttachLoopDeviceFail(filename string, fd *int) string {
+	return ""
+}
+
+func sysGetBlockSizeFail(fd uintptr, size *uint64) sysErrno {
+	return 1
+}
+
+func dmUdevWaitFail(cookie uint) int {
+	return -1
+}
+
+func dmSetDevDirFail(dir string) int {
+	return -1
+}
+
+func dmGetLibraryVersionFail(version *string) int {
+	return -1
+}

+ 340 - 0
graphdriver/devmapper/devmapper_wrapper.go

@@ -0,0 +1,340 @@
+package devmapper
+
+/*
+#cgo LDFLAGS: -L. -ldevmapper
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <libdevmapper.h>
+#include <linux/loop.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <errno.h>
+
+#ifndef LOOP_CTL_GET_FREE
+#define LOOP_CTL_GET_FREE       0x4C82
+#endif
+
+// FIXME: this could easily be rewritten in go
+char*			attach_loop_device(const char *filename, int *loop_fd_out)
+{
+  struct loop_info64	loopinfo = {0};
+  struct stat		st;
+  char			buf[64];
+  int			i, loop_fd, fd, start_index;
+  char*			loopname;
+
+
+  *loop_fd_out = -1;
+
+  start_index = 0;
+  fd = open("/dev/loop-control", O_RDONLY);
+  if (fd >= 0) {
+    start_index = ioctl(fd, LOOP_CTL_GET_FREE);
+    close(fd);
+
+    if (start_index < 0)
+      start_index = 0;
+  }
+
+  fd = open(filename, O_RDWR);
+  if (fd < 0) {
+    perror("open");
+    return NULL;
+  }
+
+  loop_fd = -1;
+  for (i = start_index ; loop_fd < 0 ; i++ ) {
+    if (sprintf(buf, "/dev/loop%d", i) < 0) {
+	close(fd);
+	return NULL;
+    }
+
+    if (stat(buf, &st)) {
+      if (!S_ISBLK(st.st_mode)) {
+	 fprintf(stderr, "[error] Loopback device %s is not a block device.\n", buf);
+      } else if (errno == ENOENT) {
+	fprintf(stderr, "[error] There are no more loopback device available.\n");
+      } else {
+	fprintf(stderr, "[error] Unkown error trying to stat the loopback device %s (errno: %d).\n", buf, errno);
+      }
+      close(fd);
+      return NULL;
+    }
+
+    loop_fd = open(buf, O_RDWR);
+    if (loop_fd < 0 && errno == ENOENT) {
+      fprintf(stderr, "[error] The loopback device %s does not exists.\n", buf);
+      close(fd);
+      return NULL;
+    } else if (loop_fd < 0) {
+	fprintf(stderr, "[error] Unkown error openning the loopback device %s. (errno: %d)\n", buf, errno);
+	continue;
+    }
+
+    if (ioctl(loop_fd, LOOP_SET_FD, (void *)(size_t)fd) < 0) {
+      int errsv = errno;
+      close(loop_fd);
+      loop_fd = -1;
+      if (errsv != EBUSY) {
+        close(fd);
+        fprintf(stderr, "cannot set up loopback device %s: %s", buf, strerror(errsv));
+        return NULL;
+      }
+      continue;
+    }
+
+    close(fd);
+
+    strncpy((char*)loopinfo.lo_file_name, buf, LO_NAME_SIZE);
+    loopinfo.lo_offset = 0;
+    loopinfo.lo_flags = LO_FLAGS_AUTOCLEAR;
+
+    if (ioctl(loop_fd, LOOP_SET_STATUS64, &loopinfo) < 0) {
+      perror("ioctl LOOP_SET_STATUS64");
+      if (ioctl(loop_fd, LOOP_CLR_FD, 0) < 0) {
+        perror("ioctl LOOP_CLR_FD");
+      }
+      close(loop_fd);
+      fprintf (stderr, "cannot set up loopback device info");
+      return (NULL);
+    }
+
+    loopname = strdup(buf);
+    if (loopname == NULL) {
+      close(loop_fd);
+      return (NULL);
+    }
+
+    *loop_fd_out = loop_fd;
+    return (loopname);
+  }
+
+  return (NULL);
+}
+
+extern void DevmapperLogCallback(int level, char *file, int line, int dm_errno_or_class, char *str);
+
+static void	log_cb(int level, const char *file, int line,
+		       int dm_errno_or_class, const char *f, ...)
+{
+  char buffer[256];
+  va_list ap;
+
+  va_start(ap, f);
+  vsnprintf(buffer, 256, f, ap);
+  va_end(ap);
+
+  DevmapperLogCallback(level, (char *)file, line, dm_errno_or_class, buffer);
+}
+
+static void	log_with_errno_init()
+{
+  dm_log_with_errno_init(log_cb);
+}
+
+*/
+import "C"
+
+import (
+	"unsafe"
+)
+
+type (
+	CDmTask C.struct_dm_task
+)
+
+var (
+	DmAttachLoopDevice       = dmAttachLoopDeviceFct
+	DmGetBlockSize           = dmGetBlockSizeFct
+	DmGetLibraryVersion      = dmGetLibraryVersionFct
+	DmGetNextTarget          = dmGetNextTargetFct
+	DmLogInitVerbose         = dmLogInitVerboseFct
+	DmSetDevDir              = dmSetDevDirFct
+	DmTaskAddTarget          = dmTaskAddTargetFct
+	DmTaskCreate             = dmTaskCreateFct
+	DmTaskDestroy            = dmTaskDestroyFct
+	DmTaskGetInfo            = dmTaskGetInfoFct
+	DmTaskRun                = dmTaskRunFct
+	DmTaskSetAddNode         = dmTaskSetAddNodeFct
+	DmTaskSetCookie          = dmTaskSetCookieFct
+	DmTaskSetMessage         = dmTaskSetMessageFct
+	DmTaskSetName            = dmTaskSetNameFct
+	DmTaskSetRo              = dmTaskSetRoFct
+	DmTaskSetSector          = dmTaskSetSectorFct
+	DmUdevWait               = dmUdevWaitFct
+	GetBlockSize             = getBlockSizeFct
+	LogWithErrnoInit         = logWithErrnoInitFct
+	DmGetLoopbackBackingFile = dmGetLoopbackBackingFileFct
+	DmLoopbackSetCapacity    = dmLoopbackSetCapacityFct
+)
+
+func free(p *C.char) {
+	C.free(unsafe.Pointer(p))
+}
+
+func dmTaskDestroyFct(task *CDmTask) {
+	C.dm_task_destroy((*C.struct_dm_task)(task))
+}
+
+func dmTaskCreateFct(taskType int) *CDmTask {
+	return (*CDmTask)(C.dm_task_create(C.int(taskType)))
+}
+
+func dmTaskRunFct(task *CDmTask) int {
+	return int(C.dm_task_run((*C.struct_dm_task)(task)))
+}
+
+func dmTaskSetNameFct(task *CDmTask, name string) int {
+	Cname := C.CString(name)
+	defer free(Cname)
+
+	return int(C.dm_task_set_name((*C.struct_dm_task)(task),
+		Cname))
+}
+
+func dmTaskSetMessageFct(task *CDmTask, message string) int {
+	Cmessage := C.CString(message)
+	defer free(Cmessage)
+
+	return int(C.dm_task_set_message((*C.struct_dm_task)(task),
+		Cmessage))
+}
+
+func dmTaskSetSectorFct(task *CDmTask, sector uint64) int {
+	return int(C.dm_task_set_sector((*C.struct_dm_task)(task),
+		C.uint64_t(sector)))
+}
+
+func dmTaskSetCookieFct(task *CDmTask, cookie *uint, flags uint16) int {
+	cCookie := C.uint32_t(*cookie)
+	defer func() {
+		*cookie = uint(cCookie)
+	}()
+	return int(C.dm_task_set_cookie((*C.struct_dm_task)(task), &cCookie,
+		C.uint16_t(flags)))
+}
+
+func dmTaskSetAddNodeFct(task *CDmTask, addNode AddNodeType) int {
+	return int(C.dm_task_set_add_node((*C.struct_dm_task)(task),
+		C.dm_add_node_t(addNode)))
+}
+
+func dmTaskSetRoFct(task *CDmTask) int {
+	return int(C.dm_task_set_ro((*C.struct_dm_task)(task)))
+}
+
+func dmTaskAddTargetFct(task *CDmTask,
+	start, size uint64, ttype, params string) int {
+
+	Cttype := C.CString(ttype)
+	defer free(Cttype)
+
+	Cparams := C.CString(params)
+	defer free(Cparams)
+
+	return int(C.dm_task_add_target((*C.struct_dm_task)(task),
+		C.uint64_t(start), C.uint64_t(size), Cttype, Cparams))
+}
+
+func dmGetLoopbackBackingFileFct(fd uintptr) (uint64, uint64, sysErrno) {
+	var lo64 C.struct_loop_info64
+	_, _, err := sysSyscall(sysSysIoctl, fd, C.LOOP_GET_STATUS64,
+		uintptr(unsafe.Pointer(&lo64)))
+	return uint64(lo64.lo_device), uint64(lo64.lo_inode), sysErrno(err)
+}
+
+func dmLoopbackSetCapacityFct(fd uintptr) sysErrno {
+	_, _, err := sysSyscall(sysSysIoctl, fd, C.LOOP_SET_CAPACITY, 0)
+	return sysErrno(err)
+}
+
+func dmGetBlockSizeFct(fd uintptr) (int64, sysErrno) {
+	var size int64
+	_, _, err := sysSyscall(sysSysIoctl, fd, C.BLKGETSIZE64, uintptr(unsafe.Pointer(&size)))
+	return size, sysErrno(err)
+}
+
+func dmTaskGetInfoFct(task *CDmTask, info *Info) int {
+	Cinfo := C.struct_dm_info{}
+	defer func() {
+		info.Exists = int(Cinfo.exists)
+		info.Suspended = int(Cinfo.suspended)
+		info.LiveTable = int(Cinfo.live_table)
+		info.InactiveTable = int(Cinfo.inactive_table)
+		info.OpenCount = int32(Cinfo.open_count)
+		info.EventNr = uint32(Cinfo.event_nr)
+		info.Major = uint32(Cinfo.major)
+		info.Minor = uint32(Cinfo.minor)
+		info.ReadOnly = int(Cinfo.read_only)
+		info.TargetCount = int32(Cinfo.target_count)
+	}()
+	return int(C.dm_task_get_info((*C.struct_dm_task)(task), &Cinfo))
+}
+
+func dmGetNextTargetFct(task *CDmTask, next uintptr, start, length *uint64, target, params *string) uintptr {
+	var (
+		Cstart, Clength      C.uint64_t
+		CtargetType, Cparams *C.char
+	)
+	defer func() {
+		*start = uint64(Cstart)
+		*length = uint64(Clength)
+		*target = C.GoString(CtargetType)
+		*params = C.GoString(Cparams)
+	}()
+
+	nextp := C.dm_get_next_target((*C.struct_dm_task)(task),
+		unsafe.Pointer(next), &Cstart, &Clength, &CtargetType, &Cparams)
+	return uintptr(nextp)
+}
+
+func dmAttachLoopDeviceFct(filename string, fd *int) string {
+	cFilename := C.CString(filename)
+	defer free(cFilename)
+
+	var cFd C.int
+	defer func() {
+		*fd = int(cFd)
+	}()
+
+	ret := C.attach_loop_device(cFilename, &cFd)
+	defer free(ret)
+	return C.GoString(ret)
+}
+
+func getBlockSizeFct(fd uintptr, size *uint64) sysErrno {
+	_, _, err := sysSyscall(sysSysIoctl, fd, C.BLKGETSIZE64, uintptr(unsafe.Pointer(&size)))
+	return sysErrno(err)
+}
+
+func dmUdevWaitFct(cookie uint) int {
+	return int(C.dm_udev_wait(C.uint32_t(cookie)))
+}
+
+func dmLogInitVerboseFct(level int) {
+	C.dm_log_init_verbose(C.int(level))
+}
+
+func logWithErrnoInitFct() {
+	C.log_with_errno_init()
+}
+
+func dmSetDevDirFct(dir string) int {
+	Cdir := C.CString(dir)
+	defer free(Cdir)
+
+	return int(C.dm_set_dev_dir(Cdir))
+}
+
+func dmGetLibraryVersionFct(version *string) int {
+	buffer := C.CString(string(make([]byte, 128)))
+	defer free(buffer)
+	defer func() {
+		*version = C.GoString(buffer)
+	}()
+	return int(C.dm_get_library_version(buffer, 128))
+}

+ 126 - 0
graphdriver/devmapper/driver.go

@@ -0,0 +1,126 @@
+package devmapper
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/graphdriver"
+	"io/ioutil"
+	"path"
+)
+
+func init() {
+	graphdriver.Register("devicemapper", Init)
+}
+
+// Placeholder interfaces, to be replaced
+// at integration.
+
+// End of placeholder interfaces.
+
+type Driver struct {
+	*DeviceSet
+	home string
+}
+
+var Init = func(home string) (graphdriver.Driver, error) {
+	deviceSet, err := NewDeviceSet(home, true)
+	if err != nil {
+		return nil, err
+	}
+	d := &Driver{
+		DeviceSet: deviceSet,
+		home:      home,
+	}
+	return d, nil
+}
+
+func (d *Driver) String() string {
+	return "devicemapper"
+}
+
+func (d *Driver) Status() [][2]string {
+	s := d.DeviceSet.Status()
+
+	status := [][2]string{
+		{"Pool Name", s.PoolName},
+		{"Data file", s.DataLoopback},
+		{"Metadata file", s.MetadataLoopback},
+		{"Data Space Used", fmt.Sprintf("%.1f Mb", float64(s.Data.Used)/(1024*1024))},
+		{"Data Space Total", fmt.Sprintf("%.1f Mb", float64(s.Data.Total)/(1024*1024))},
+		{"Metadata Space Used", fmt.Sprintf("%.1f Mb", float64(s.Metadata.Used)/(1024*1024))},
+		{"Metadata Space Total", fmt.Sprintf("%.1f Mb", float64(s.Metadata.Total)/(1024*1024))},
+	}
+	return status
+}
+
+func (d *Driver) Cleanup() error {
+	return d.DeviceSet.Shutdown()
+}
+
+func (d *Driver) Create(id, parent string) error {
+	if err := d.DeviceSet.AddDevice(id, parent); err != nil {
+		return err
+	}
+
+	mp := path.Join(d.home, "mnt", id)
+	if err := d.mount(id, mp); err != nil {
+		return err
+	}
+
+	if err := osMkdirAll(path.Join(mp, "rootfs"), 0755); err != nil && !osIsExist(err) {
+		return err
+	}
+
+	// Create an "id" file with the container/image id in it to help reconscruct this in case
+	// of later problems
+	if err := ioutil.WriteFile(path.Join(mp, "id"), []byte(id), 0600); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (d *Driver) Remove(id string) error {
+	mp := path.Join(d.home, "mnt", id)
+	if err := d.unmount(id, mp); err != nil {
+		return err
+	}
+	return d.DeviceSet.RemoveDevice(id)
+}
+
+func (d *Driver) Get(id string) (string, error) {
+	mp := path.Join(d.home, "mnt", id)
+	if err := d.mount(id, mp); err != nil {
+		return "", err
+	}
+	return path.Join(mp, "rootfs"), nil
+}
+
+func (d *Driver) mount(id, mountPoint string) error {
+	// Create the target directories if they don't exist
+	if err := osMkdirAll(mountPoint, 0755); err != nil && !osIsExist(err) {
+		return err
+	}
+	// If mountpoint is already mounted, do nothing
+	if mounted, err := Mounted(mountPoint); err != nil {
+		return fmt.Errorf("Error checking mountpoint: %s", err)
+	} else if mounted {
+		return nil
+	}
+	// Mount the device
+	return d.DeviceSet.MountDevice(id, mountPoint, false)
+}
+
+func (d *Driver) unmount(id, mountPoint string) error {
+	// If mountpoint is not mounted, do nothing
+	if mounted, err := Mounted(mountPoint); err != nil {
+		return fmt.Errorf("Error checking mountpoint: %s", err)
+	} else if !mounted {
+		return nil
+	}
+	// Unmount the device
+	return d.DeviceSet.UnmountDevice(id, mountPoint, true)
+}
+
+func (d *Driver) Exists(id string) bool {
+	return d.Devices[id] != nil
+}

+ 872 - 0
graphdriver/devmapper/driver_test.go

@@ -0,0 +1,872 @@
+package devmapper
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/graphdriver"
+	"io/ioutil"
+	"path"
+	"runtime"
+	"strings"
+	"syscall"
+	"testing"
+)
+
+func init() {
+	// Reduce the size the the base fs and loopback for the tests
+	DefaultDataLoopbackSize = 300 * 1024 * 1024
+	DefaultMetaDataLoopbackSize = 200 * 1024 * 1024
+	DefaultBaseFsSize = 300 * 1024 * 1024
+}
+
+// denyAllDevmapper mocks all calls to libdevmapper in the unit tests, and denies them by default
+func denyAllDevmapper() {
+	// Hijack all calls to libdevmapper with default panics.
+	// Authorized calls are selectively hijacked in each tests.
+	DmTaskCreate = func(t int) *CDmTask {
+		panic("DmTaskCreate: this method should not be called here")
+	}
+	DmTaskRun = func(task *CDmTask) int {
+		panic("DmTaskRun: this method should not be called here")
+	}
+	DmTaskSetName = func(task *CDmTask, name string) int {
+		panic("DmTaskSetName: this method should not be called here")
+	}
+	DmTaskSetMessage = func(task *CDmTask, message string) int {
+		panic("DmTaskSetMessage: this method should not be called here")
+	}
+	DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+		panic("DmTaskSetSector: this method should not be called here")
+	}
+	DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+		panic("DmTaskSetCookie: this method should not be called here")
+	}
+	DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+		panic("DmTaskSetAddNode: this method should not be called here")
+	}
+	DmTaskSetRo = func(task *CDmTask) int {
+		panic("DmTaskSetRo: this method should not be called here")
+	}
+	DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+		panic("DmTaskAddTarget: this method should not be called here")
+	}
+	DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+		panic("DmTaskGetInfo: this method should not be called here")
+	}
+	DmGetNextTarget = func(task *CDmTask, next uintptr, start, length *uint64, target, params *string) uintptr {
+		panic("DmGetNextTarget: this method should not be called here")
+	}
+	DmAttachLoopDevice = func(filename string, fd *int) string {
+		panic("DmAttachLoopDevice: this method should not be called here")
+	}
+	DmGetBlockSize = func(fd uintptr) (int64, sysErrno) {
+		panic("DmGetBlockSize: this method should not be called here")
+	}
+	DmUdevWait = func(cookie uint) int {
+		panic("DmUdevWait: this method should not be called here")
+	}
+	DmSetDevDir = func(dir string) int {
+		panic("DmSetDevDir: this method should not be called here")
+	}
+	DmGetLibraryVersion = func(version *string) int {
+		panic("DmGetLibraryVersion: this method should not be called here")
+	}
+	DmLogInitVerbose = func(level int) {
+		panic("DmLogInitVerbose: this method should not be called here")
+	}
+	DmTaskDestroy = func(task *CDmTask) {
+		panic("DmTaskDestroy: this method should not be called here")
+	}
+	GetBlockSize = func(fd uintptr, size *uint64) sysErrno {
+		panic("GetBlockSize: this method should not be called here")
+	}
+	LogWithErrnoInit = func() {
+		panic("LogWithErrnoInit: this method should not be called here")
+	}
+}
+
+func denyAllSyscall() {
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		panic("sysMount: this method should not be called here")
+	}
+	sysUnmount = func(target string, flags int) (err error) {
+		panic("sysUnmount: this method should not be called here")
+	}
+	sysCloseOnExec = func(fd int) {
+		panic("sysCloseOnExec: this method should not be called here")
+	}
+	sysSyscall = func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+		panic("sysSyscall: this method should not be called here")
+	}
+	// Not a syscall, but forbidding it here anyway
+	Mounted = func(mnt string) (bool, error) {
+		panic("devmapper.Mounted: this method should not be called here")
+	}
+	// osOpenFile = os.OpenFile
+	// osNewFile = os.NewFile
+	// osCreate = os.Create
+	// osStat = os.Stat
+	// osIsNotExist = os.IsNotExist
+	// osIsExist = os.IsExist
+	// osMkdirAll = os.MkdirAll
+	// osRemoveAll = os.RemoveAll
+	// osRename = os.Rename
+	// osReadlink = os.Readlink
+
+	// execRun = func(name string, args ...string) error {
+	// 	return exec.Command(name, args...).Run()
+	// }
+}
+
+func mkTestDirectory(t *testing.T) string {
+	dir, err := ioutil.TempDir("", "docker-test-devmapper-")
+	if err != nil {
+		t.Fatal(err)
+	}
+	return dir
+}
+
+func newDriver(t *testing.T) *Driver {
+	home := mkTestDirectory(t)
+	d, err := Init(home)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return d.(*Driver)
+}
+
+func cleanup(d *Driver) {
+	d.Cleanup()
+	osRemoveAll(d.home)
+}
+
+type Set map[string]bool
+
+func (r Set) Assert(t *testing.T, names ...string) {
+	for _, key := range names {
+		if _, exists := r[key]; !exists {
+			t.Fatalf("Key not set: %s", key)
+		}
+		delete(r, key)
+	}
+	if len(r) != 0 {
+		t.Fatalf("Unexpected keys: %v", r)
+	}
+}
+
+func TestInit(t *testing.T) {
+	var (
+		calls           = make(Set)
+		devicesAttached = make(Set)
+		taskMessages    = make(Set)
+		taskTypes       = make(Set)
+		home            = mkTestDirectory(t)
+	)
+	defer osRemoveAll(home)
+
+	func() {
+		denyAllDevmapper()
+		DmSetDevDir = func(dir string) int {
+			calls["DmSetDevDir"] = true
+			expectedDir := "/dev"
+			if dir != expectedDir {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmSetDevDir(%v)\nReceived: DmSetDevDir(%v)\n", expectedDir, dir)
+			}
+			return 0
+		}
+		LogWithErrnoInit = func() {
+			calls["DmLogWithErrnoInit"] = true
+		}
+		var task1 CDmTask
+		DmTaskCreate = func(taskType int) *CDmTask {
+			calls["DmTaskCreate"] = true
+			taskTypes[fmt.Sprintf("%d", taskType)] = true
+			return &task1
+		}
+		DmTaskSetName = func(task *CDmTask, name string) int {
+			calls["DmTaskSetName"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetName(%v)\nReceived: DmTaskSetName(%v)\n", expectedTask, task)
+			}
+			// FIXME: use Set.AssertRegexp()
+			if !strings.HasPrefix(name, "docker-") && !strings.HasPrefix(name, "/dev/mapper/docker-") ||
+				!strings.HasSuffix(name, "-pool") && !strings.HasSuffix(name, "-base") {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetName(%v)\nReceived: DmTaskSetName(%v)\n", "docker-...-pool", name)
+			}
+			return 1
+		}
+		DmTaskRun = func(task *CDmTask) int {
+			calls["DmTaskRun"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskRun(%v)\nReceived: DmTaskRun(%v)\n", expectedTask, task)
+			}
+			return 1
+		}
+		DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+			calls["DmTaskGetInfo"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskGetInfo(%v)\nReceived: DmTaskGetInfo(%v)\n", expectedTask, task)
+			}
+			// This will crash if info is not dereferenceable
+			info.Exists = 0
+			return 1
+		}
+		DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+			calls["DmTaskSetSector"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetSector(%v)\nReceived: DmTaskSetSector(%v)\n", expectedTask, task)
+			}
+			if expectedSector := uint64(0); sector != expectedSector {
+				t.Fatalf("Wrong libdevmapper call to DmTaskSetSector\nExpected: %v\nReceived: %v\n", expectedSector, sector)
+			}
+			return 1
+		}
+		DmTaskSetMessage = func(task *CDmTask, message string) int {
+			calls["DmTaskSetMessage"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetSector(%v)\nReceived: DmTaskSetSector(%v)\n", expectedTask, task)
+			}
+			taskMessages[message] = true
+			return 1
+		}
+		var (
+			fakeDataLoop       = "/dev/loop42"
+			fakeMetadataLoop   = "/dev/loop43"
+			fakeDataLoopFd     = 42
+			fakeMetadataLoopFd = 43
+		)
+		var attachCount int
+		DmAttachLoopDevice = func(filename string, fd *int) string {
+			calls["DmAttachLoopDevice"] = true
+			if _, exists := devicesAttached[filename]; exists {
+				t.Fatalf("Already attached %s", filename)
+			}
+			devicesAttached[filename] = true
+			// This will crash if fd is not dereferenceable
+			if attachCount == 0 {
+				attachCount++
+				*fd = fakeDataLoopFd
+				return fakeDataLoop
+			} else {
+				*fd = fakeMetadataLoopFd
+				return fakeMetadataLoop
+			}
+		}
+		DmTaskDestroy = func(task *CDmTask) {
+			calls["DmTaskDestroy"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+		}
+		fakeBlockSize := int64(4242 * 512)
+		DmGetBlockSize = func(fd uintptr) (int64, sysErrno) {
+			calls["DmGetBlockSize"] = true
+			if expectedFd := uintptr(42); fd != expectedFd {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmGetBlockSize(%v)\nReceived: DmGetBlockSize(%v)\n", expectedFd, fd)
+			}
+			return fakeBlockSize, 0
+		}
+		DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+			calls["DmTaskSetTarget"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+			if start != 0 {
+				t.Fatalf("Wrong start: %d != %d", start, 0)
+			}
+			if ttype != "thin" && ttype != "thin-pool" {
+				t.Fatalf("Wrong ttype: %s", ttype)
+			}
+			// Quick smoke test
+			if params == "" {
+				t.Fatalf("Params should not be empty")
+			}
+			return 1
+		}
+		fakeCookie := uint(4321)
+		DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+			calls["DmTaskSetCookie"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+			if flags != 0 {
+				t.Fatalf("Cookie flags should be 0 (not %x)", flags)
+			}
+			*cookie = fakeCookie
+			return 1
+		}
+		DmUdevWait = func(cookie uint) int {
+			calls["DmUdevWait"] = true
+			if cookie != fakeCookie {
+				t.Fatalf("Wrong cookie: %d != %d", cookie, fakeCookie)
+			}
+			return 1
+		}
+		DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+			if addNode != AddNodeOnCreate {
+				t.Fatalf("Wrong AddNoteType: %v (expected %v)", addNode, AddNodeOnCreate)
+			}
+			calls["DmTaskSetAddNode"] = true
+			return 1
+		}
+		execRun = func(name string, args ...string) error {
+			calls["execRun"] = true
+			if name != "mkfs.ext4" {
+				t.Fatalf("Expected %s to be executed, not %s", "mkfs.ext4", name)
+			}
+			return nil
+		}
+		driver, err := Init(home)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer func() {
+			if err := driver.Cleanup(); err != nil {
+				t.Fatal(err)
+			}
+		}()
+	}()
+	// Put all tests in a funciton to make sure the garbage collection will
+	// occur.
+
+	// Call GC to cleanup runtime.Finalizers
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmSetDevDir",
+		"DmLogWithErrnoInit",
+		"DmTaskSetName",
+		"DmTaskRun",
+		"DmTaskGetInfo",
+		"DmAttachLoopDevice",
+		"DmTaskDestroy",
+		"execRun",
+		"DmTaskCreate",
+		"DmGetBlockSize",
+		"DmTaskSetTarget",
+		"DmTaskSetCookie",
+		"DmUdevWait",
+		"DmTaskSetSector",
+		"DmTaskSetMessage",
+		"DmTaskSetAddNode",
+	)
+	devicesAttached.Assert(t, path.Join(home, "devicemapper", "data"), path.Join(home, "devicemapper", "metadata"))
+	taskTypes.Assert(t, "0", "6", "17")
+	taskMessages.Assert(t, "create_thin 0", "set_transaction_id 0 1")
+}
+
+func fakeInit() func(home string) (graphdriver.Driver, error) {
+	oldInit := Init
+	Init = func(home string) (graphdriver.Driver, error) {
+		return &Driver{
+			home: home,
+		}, nil
+	}
+	return oldInit
+}
+
+func restoreInit(init func(home string) (graphdriver.Driver, error)) {
+	Init = init
+}
+
+func mockAllDevmapper(calls Set) {
+	DmSetDevDir = func(dir string) int {
+		calls["DmSetDevDir"] = true
+		return 0
+	}
+	LogWithErrnoInit = func() {
+		calls["DmLogWithErrnoInit"] = true
+	}
+	DmTaskCreate = func(taskType int) *CDmTask {
+		calls["DmTaskCreate"] = true
+		return &CDmTask{}
+	}
+	DmTaskSetName = func(task *CDmTask, name string) int {
+		calls["DmTaskSetName"] = true
+		return 1
+	}
+	DmTaskRun = func(task *CDmTask) int {
+		calls["DmTaskRun"] = true
+		return 1
+	}
+	DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+		calls["DmTaskGetInfo"] = true
+		return 1
+	}
+	DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+		calls["DmTaskSetSector"] = true
+		return 1
+	}
+	DmTaskSetMessage = func(task *CDmTask, message string) int {
+		calls["DmTaskSetMessage"] = true
+		return 1
+	}
+	DmAttachLoopDevice = func(filename string, fd *int) string {
+		calls["DmAttachLoopDevice"] = true
+		return "/dev/loop42"
+	}
+	DmTaskDestroy = func(task *CDmTask) {
+		calls["DmTaskDestroy"] = true
+	}
+	DmGetBlockSize = func(fd uintptr) (int64, sysErrno) {
+		calls["DmGetBlockSize"] = true
+		return int64(4242 * 512), 0
+	}
+	DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+		calls["DmTaskSetTarget"] = true
+		return 1
+	}
+	DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+		calls["DmTaskSetCookie"] = true
+		return 1
+	}
+	DmUdevWait = func(cookie uint) int {
+		calls["DmUdevWait"] = true
+		return 1
+	}
+	DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+		calls["DmTaskSetAddNode"] = true
+		return 1
+	}
+	execRun = func(name string, args ...string) error {
+		calls["execRun"] = true
+		return nil
+	}
+}
+
+func TestDriverName(t *testing.T) {
+	denyAllDevmapper()
+	defer denyAllDevmapper()
+
+	oldInit := fakeInit()
+	defer restoreInit(oldInit)
+
+	d := newDriver(t)
+	if d.String() != "devicemapper" {
+		t.Fatalf("Expected driver name to be devicemapper got %s", d.String())
+	}
+}
+
+func TestDriverCreate(t *testing.T) {
+	denyAllDevmapper()
+	denyAllSyscall()
+	defer denyAllSyscall()
+	defer denyAllDevmapper()
+
+	calls := make(Set)
+	mockAllDevmapper(calls)
+
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		calls["sysMount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedSource := "/dev/mapper/docker-"; !strings.HasPrefix(source, expectedSource) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedSource, source)
+		}
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFstype := "ext4"; fstype != expectedFstype {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFstype, fstype)
+		}
+		if expectedFlags := uintptr(3236757504); flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+
+	Mounted = func(mnt string) (bool, error) {
+		calls["Mounted"] = true
+		if !strings.HasPrefix(mnt, "/tmp/docker-test-devmapper-") || !strings.HasSuffix(mnt, "/mnt/1") {
+			t.Fatalf("Wrong mounted call\nExpected: Mounted(%v)\nReceived: Mounted(%v)\n", "/tmp/docker-test-devmapper-.../mnt/1", mnt)
+		}
+		return false, nil
+	}
+
+	func() {
+		d := newDriver(t)
+
+		calls.Assert(t,
+			"DmSetDevDir",
+			"DmLogWithErrnoInit",
+			"DmTaskSetName",
+			"DmTaskRun",
+			"DmTaskGetInfo",
+			"DmAttachLoopDevice",
+			"execRun",
+			"DmTaskCreate",
+			"DmGetBlockSize",
+			"DmTaskSetTarget",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetSector",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+		if err := d.Create("1", ""); err != nil {
+			t.Fatal(err)
+		}
+		calls.Assert(t,
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"sysMount",
+			"Mounted",
+			"DmTaskRun",
+			"DmTaskSetTarget",
+			"DmTaskSetSector",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+	}()
+
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmTaskDestroy",
+	)
+}
+
+func TestDriverRemove(t *testing.T) {
+	denyAllDevmapper()
+	denyAllSyscall()
+	defer denyAllSyscall()
+	defer denyAllDevmapper()
+
+	calls := make(Set)
+	mockAllDevmapper(calls)
+
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		calls["sysMount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedSource := "/dev/mapper/docker-"; !strings.HasPrefix(source, expectedSource) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedSource, source)
+		}
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFstype := "ext4"; fstype != expectedFstype {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFstype, fstype)
+		}
+		if expectedFlags := uintptr(3236757504); flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+	sysUnmount = func(target string, flags int) (err error) {
+		calls["sysUnmount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFlags := 0; flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+	Mounted = func(mnt string) (bool, error) {
+		calls["Mounted"] = true
+		return false, nil
+	}
+
+	func() {
+		d := newDriver(t)
+
+		calls.Assert(t,
+			"DmSetDevDir",
+			"DmLogWithErrnoInit",
+			"DmTaskSetName",
+			"DmTaskRun",
+			"DmTaskGetInfo",
+			"DmAttachLoopDevice",
+			"execRun",
+			"DmTaskCreate",
+			"DmGetBlockSize",
+			"DmTaskSetTarget",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetSector",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+		if err := d.Create("1", ""); err != nil {
+			t.Fatal(err)
+		}
+
+		calls.Assert(t,
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"sysMount",
+			"Mounted",
+			"DmTaskRun",
+			"DmTaskSetTarget",
+			"DmTaskSetSector",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+		Mounted = func(mnt string) (bool, error) {
+			calls["Mounted"] = true
+			return true, nil
+		}
+
+		if err := d.Remove("1"); err != nil {
+			t.Fatal(err)
+		}
+
+		calls.Assert(t,
+			"DmTaskRun",
+			"DmTaskSetSector",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"Mounted",
+			"sysUnmount",
+		)
+	}()
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmTaskDestroy",
+	)
+}
+
+func TestCleanup(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skip("Unimplemented")
+	d := newDriver(t)
+	defer osRemoveAll(d.home)
+
+	mountPoints := make([]string, 2)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	// Mount the id
+	p, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	mountPoints[0] = p
+
+	if err := d.Create("2", "1"); err != nil {
+		t.Fatal(err)
+	}
+
+	p, err = d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	mountPoints[1] = p
+
+	// Ensure that all the mount points are currently mounted
+	for _, p := range mountPoints {
+		if mounted, err := Mounted(p); err != nil {
+			t.Fatal(err)
+		} else if !mounted {
+			t.Fatalf("Expected %s to be mounted", p)
+		}
+	}
+
+	// Ensure that devices are active
+	for _, p := range []string{"1", "2"} {
+		if !d.HasActivatedDevice(p) {
+			t.Fatalf("Expected %s to have an active device", p)
+		}
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure that all the mount points are no longer mounted
+	for _, p := range mountPoints {
+		if mounted, err := Mounted(p); err != nil {
+			t.Fatal(err)
+		} else if mounted {
+			t.Fatalf("Expected %s to not be mounted", p)
+		}
+	}
+
+	// Ensure that devices are no longer activated
+	for _, p := range []string{"1", "2"} {
+		if d.HasActivatedDevice(p) {
+			t.Fatalf("Expected %s not be an active device", p)
+		}
+	}
+}
+
+func TestNotMounted(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skip("Not implemented")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	mounted, err := Mounted(path.Join(d.home, "mnt", "1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mounted {
+		t.Fatal("Id 1 should not be mounted")
+	}
+}
+
+func TestMounted(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	mounted, err := Mounted(path.Join(d.home, "mnt", "1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !mounted {
+		t.Fatal("Id 1 should be mounted")
+	}
+}
+
+func TestInitCleanedDriver(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+
+	driver, err := Init(d.home)
+	if err != nil {
+		t.Fatal(err)
+	}
+	d = driver.(*Driver)
+	defer cleanup(d)
+
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMountMountedDriver(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	// Perform get on same id to ensure that it will
+	// not be mounted twice
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestGetReturnsValidDevice(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if !d.HasDevice("1") {
+		t.Fatalf("Expected id 1 to be in device set")
+	}
+
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	if !d.HasActivatedDevice("1") {
+		t.Fatalf("Expected id 1 to be activated")
+	}
+
+	if !d.HasInitializedDevice("1") {
+		t.Fatalf("Expected id 1 to be initialized")
+	}
+}
+
+func TestDriverGetSize(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skipf("Size is currently not implemented")
+
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	mountPoint, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	size := int64(1024)
+
+	f, err := osCreate(path.Join(mountPoint, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	// diffSize, err := d.DiffSize("1")
+	// if err != nil {
+	// 	t.Fatal(err)
+	// }
+	// if diffSize != size {
+	// 	t.Fatalf("Expected size %d got %d", size, diffSize)
+	// }
+}
+
+func assertMap(t *testing.T, m map[string]bool, keys ...string) {
+	for _, key := range keys {
+		if _, exists := m[key]; !exists {
+			t.Fatalf("Key not set: %s", key)
+		}
+		delete(m, key)
+	}
+	if len(m) != 0 {
+		t.Fatalf("Unexpected keys: %v", m)
+	}
+}

+ 25 - 0
graphdriver/devmapper/mount.go

@@ -0,0 +1,25 @@
+package devmapper
+
+import (
+	"path/filepath"
+)
+
+// FIXME: this is copy-pasted from the aufs driver.
+// It should be moved into the core.
+
+var Mounted = func(mountpoint string) (bool, error) {
+	mntpoint, err := osStat(mountpoint)
+	if err != nil {
+		if osIsNotExist(err) {
+			return false, nil
+		}
+		return false, err
+	}
+	parent, err := osStat(filepath.Join(mountpoint, ".."))
+	if err != nil {
+		return false, err
+	}
+	mntpointSt := toSysStatT(mntpoint.Sys())
+	parentSt := toSysStatT(parent.Sys())
+	return mntpointSt.Dev != parentSt.Dev, nil
+}

+ 50 - 0
graphdriver/devmapper/sys.go

@@ -0,0 +1,50 @@
+package devmapper
+
+import (
+	"os"
+	"os/exec"
+	"syscall"
+)
+
+type (
+	sysStatT syscall.Stat_t
+	sysErrno syscall.Errno
+
+	osFile struct{ *os.File }
+)
+
+var (
+	sysMount       = syscall.Mount
+	sysUnmount     = syscall.Unmount
+	sysCloseOnExec = syscall.CloseOnExec
+	sysSyscall     = syscall.Syscall
+
+	osOpenFile   = os.OpenFile
+	osNewFile    = os.NewFile
+	osCreate     = os.Create
+	osStat       = os.Stat
+	osIsNotExist = os.IsNotExist
+	osIsExist    = os.IsExist
+	osMkdirAll   = os.MkdirAll
+	osRemoveAll  = os.RemoveAll
+	osRename     = os.Rename
+	osReadlink   = os.Readlink
+
+	execRun = func(name string, args ...string) error {
+		return exec.Command(name, args...).Run()
+	}
+)
+
+const (
+	sysMsMgcVal = syscall.MS_MGC_VAL
+	sysMsRdOnly = syscall.MS_RDONLY
+	sysEInval   = syscall.EINVAL
+	sysSysIoctl = syscall.SYS_IOCTL
+
+	osORdWr   = os.O_RDWR
+	osOCreate = os.O_CREATE
+)
+
+func toSysStatT(i interface{}) *sysStatT {
+	return (*sysStatT)(i.(*syscall.Stat_t))
+}

+ 90 - 0
graphdriver/driver.go

@@ -0,0 +1,90 @@
+package graphdriver
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/utils"
+	"os"
+	"path"
+)
+
+type InitFunc func(root string) (Driver, error)
+
+type Driver interface {
+	String() string
+
+	Create(id, parent string) error
+	Remove(id string) error
+
+	Get(id string) (dir string, err error)
+	Exists(id string) bool
+
+	Status() [][2]string
+
+	Cleanup() error
+}
+
+type Differ interface {
+	Diff(id string) (archive.Archive, error)
+	Changes(id string) ([]archive.Change, error)
+	ApplyDiff(id string, diff archive.Archive) error
+	DiffSize(id string) (bytes int64, err error)
+}
+
+var (
+	DefaultDriver string
+	// All registred drivers
+	drivers map[string]InitFunc
+	// Slice of drivers that should be used in an order
+	priority = []string{
+		"aufs",
+		"devicemapper",
+		"vfs",
+	}
+)
+
+func init() {
+	drivers = make(map[string]InitFunc)
+}
+
+func Register(name string, initFunc InitFunc) error {
+	if _, exists := drivers[name]; exists {
+		return fmt.Errorf("Name already registered %s", name)
+	}
+	drivers[name] = initFunc
+
+	return nil
+}
+
+func GetDriver(name, home string) (Driver, error) {
+	if initFunc, exists := drivers[name]; exists {
+		return initFunc(path.Join(home, name))
+	}
+	return nil, fmt.Errorf("No such driver: %s", name)
+}
+
+func New(root string) (driver Driver, err error) {
+	for _, name := range []string{os.Getenv("DOCKER_DRIVER"), DefaultDriver} {
+		if name != "" {
+			return GetDriver(name, root)
+		}
+	}
+
+	// Check for priority drivers first
+	for _, name := range priority {
+		if driver, err = GetDriver(name, root); err != nil {
+			utils.Debugf("Error loading driver %s: %s", name, err)
+			continue
+		}
+		return driver, nil
+	}
+
+	// Check all registered drivers if no priority driver is found
+	for _, initFunc := range drivers {
+		if driver, err = initFunc(root); err != nil {
+			continue
+		}
+		return driver, nil
+	}
+	return nil, err
+}

+ 91 - 0
graphdriver/vfs/driver.go

@@ -0,0 +1,91 @@
+package vfs
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/graphdriver"
+	"os"
+	"os/exec"
+	"path"
+)
+
+func init() {
+	graphdriver.Register("vfs", Init)
+}
+
+func Init(home string) (graphdriver.Driver, error) {
+	d := &Driver{
+		home: home,
+	}
+	return d, nil
+}
+
+type Driver struct {
+	home string
+}
+
+func (d *Driver) String() string {
+	return "vfs"
+}
+
+func (d *Driver) Status() [][2]string {
+	return nil
+}
+
+func (d *Driver) Cleanup() error {
+	return nil
+}
+
+func copyDir(src, dst string) error {
+	cmd := exec.Command("cp", "-aT", "--reflink=auto", src, dst)
+	if err := cmd.Run(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (d *Driver) Create(id string, parent string) error {
+	dir := d.dir(id)
+	if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
+		return err
+	}
+	if err := os.Mkdir(dir, 0700); err != nil {
+		return err
+	}
+	if parent == "" {
+		return nil
+	}
+	parentDir, err := d.Get(parent)
+	if err != nil {
+		return fmt.Errorf("%s: %s", parent, err)
+	}
+	if err := copyDir(parentDir, dir); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (d *Driver) dir(id string) string {
+	return path.Join(d.home, "dir", path.Base(id))
+}
+
+func (d *Driver) Remove(id string) error {
+	if _, err := os.Stat(d.dir(id)); err != nil {
+		return err
+	}
+	return os.RemoveAll(d.dir(id))
+}
+
+func (d *Driver) Get(id string) (string, error) {
+	dir := d.dir(id)
+	if st, err := os.Stat(dir); err != nil {
+		return "", err
+	} else if !st.IsDir() {
+		return "", fmt.Errorf("%s: not a directory", dir)
+	}
+	return dir, nil
+}
+
+func (d *Driver) Exists(id string) bool {
+	_, err := os.Stat(d.dir(id))
+	return err == nil
+}

+ 89 - 163
image.go

@@ -6,17 +6,14 @@ import (
 	"encoding/json"
 	"encoding/json"
 	"fmt"
 	"fmt"
 	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
-	"log"
 	"os"
 	"os"
-	"os/exec"
 	"path"
 	"path"
-	"path/filepath"
 	"strconv"
 	"strconv"
 	"strings"
 	"strings"
-	"syscall"
 	"time"
 	"time"
 )
 )
 
 
@@ -62,39 +59,56 @@ func LoadImage(root string) (*Image, error) {
 		img.Size = int64(size)
 		img.Size = int64(size)
 	}
 	}
 
 
-	// Check that the filesystem layer exists
-	if stat, err := os.Stat(layerPath(root)); err != nil {
-		if os.IsNotExist(err) {
-			return nil, fmt.Errorf("Couldn't load image %s: no filesystem layer", img.ID)
-		}
-		return nil, err
-	} else if !stat.IsDir() {
-		return nil, fmt.Errorf("Couldn't load image %s: %s is not a directory", img.ID, layerPath(root))
-	}
 	return img, nil
 	return img, nil
 }
 }
 
 
-func StoreImage(img *Image, jsonData []byte, layerData archive.Archive, root string) error {
-	// Check that root doesn't already exist
-	if _, err := os.Stat(root); err == nil {
-		return fmt.Errorf("Image %s already exists", img.ID)
-	} else if !os.IsNotExist(err) {
-		return err
-	}
+func StoreImage(img *Image, jsonData []byte, layerData archive.Archive, root, layer string) error {
 	// Store the layer
 	// Store the layer
-	layer := layerPath(root)
+	var (
+		size   int64
+		err    error
+		driver = img.graph.driver
+	)
 	if err := os.MkdirAll(layer, 0755); err != nil {
 	if err := os.MkdirAll(layer, 0755); err != nil {
 		return err
 		return err
 	}
 	}
 
 
 	// If layerData is not nil, unpack it into the new layer
 	// If layerData is not nil, unpack it into the new layer
 	if layerData != nil {
 	if layerData != nil {
-		start := time.Now()
-		utils.Debugf("Start untar layer")
-		if err := archive.Untar(layerData, layer); err != nil {
-			return err
+		if differ, ok := driver.(graphdriver.Differ); ok {
+			if err := differ.ApplyDiff(img.ID, layerData); err != nil {
+				return err
+			}
+
+			if size, err = differ.DiffSize(img.ID); err != nil {
+				return err
+			}
+		} else {
+			start := time.Now()
+			utils.Debugf("Start untar layer")
+			if err := archive.ApplyLayer(layer, layerData); err != nil {
+				return err
+			}
+			utils.Debugf("Untar time: %vs", time.Now().Sub(start).Seconds())
+
+			if img.Parent == "" {
+				if size, err = utils.TreeSize(layer); err != nil {
+					return err
+				}
+			} else {
+				parent, err := driver.Get(img.Parent)
+				if err != nil {
+					return err
+				}
+				changes, err := archive.ChangesDirs(layer, parent)
+				if err != nil {
+					return err
+				}
+				if size = archive.ChangesSize(layer, changes); err != nil {
+					return err
+				}
+			}
 		}
 		}
-		utils.Debugf("Untar time: %vs", time.Now().Sub(start).Seconds())
 	}
 	}
 
 
 	// If raw json is provided, then use it
 	// If raw json is provided, then use it
@@ -102,117 +116,60 @@ func StoreImage(img *Image, jsonData []byte, layerData archive.Archive, root str
 		return ioutil.WriteFile(jsonPath(root), jsonData, 0600)
 		return ioutil.WriteFile(jsonPath(root), jsonData, 0600)
 	}
 	}
 	// Otherwise, unmarshal the image
 	// Otherwise, unmarshal the image
-	jsonData, err := json.Marshal(img)
-	if err != nil {
+	if jsonData, err = json.Marshal(img); err != nil {
 		return err
 		return err
 	}
 	}
 	if err := ioutil.WriteFile(jsonPath(root), jsonData, 0600); err != nil {
 	if err := ioutil.WriteFile(jsonPath(root), jsonData, 0600); err != nil {
 		return err
 		return err
 	}
 	}
 
 
-	return StoreSize(img, root)
-}
-
-func StoreSize(img *Image, root string) error {
-	layer := layerPath(root)
-	data := make(map[uint64]bool)
-
-	var totalSize int64
-	filepath.Walk(layer, func(path string, fileInfo os.FileInfo, err error) error {
-		size := fileInfo.Size()
-		if size == 0 {
-			return nil
-		}
-
-		inode := fileInfo.Sys().(*syscall.Stat_t).Ino
-		if _, entryExists := data[inode]; entryExists {
-			return nil
-		}
-		data[inode] = false
-
-		totalSize += size
-		return nil
-	})
-	img.Size = totalSize
-
-	if err := ioutil.WriteFile(path.Join(root, "layersize"), []byte(strconv.Itoa(int(totalSize))), 0600); err != nil {
-		return nil
+	img.Size = size
+	if err := img.SaveSize(root); err != nil {
+		return err
 	}
 	}
 
 
 	return nil
 	return nil
 }
 }
 
 
-func layerPath(root string) string {
-	return path.Join(root, "layer")
+// SaveSize stores the current `size` value of `img` in the directory `root`.
+func (img *Image) SaveSize(root string) error {
+	if err := ioutil.WriteFile(path.Join(root, "layersize"), []byte(strconv.Itoa(int(img.Size))), 0600); err != nil {
+		return fmt.Errorf("Error storing image size in %s/layersize: %s", root, err)
+	}
+	return nil
 }
 }
 
 
 func jsonPath(root string) string {
 func jsonPath(root string) string {
 	return path.Join(root, "json")
 	return path.Join(root, "json")
 }
 }
 
 
-func MountAUFS(ro []string, rw string, target string) error {
-	// FIXME: Now mount the layers
-	rwBranch := fmt.Sprintf("%v=rw", rw)
-	roBranches := ""
-	for _, layer := range ro {
-		roBranches += fmt.Sprintf("%v=ro+wh:", layer)
-	}
-	branches := fmt.Sprintf("br:%v:%v", rwBranch, roBranches)
-
-	branches += ",xino=/dev/shm/aufs.xino"
-
-	//if error, try to load aufs kernel module
-	if err := mount("none", target, "aufs", 0, branches); err != nil {
-		log.Printf("Kernel does not support AUFS, trying to load the AUFS module with modprobe...")
-		if err := exec.Command("modprobe", "aufs").Run(); err != nil {
-			return fmt.Errorf("Unable to load the AUFS module")
-		}
-		log.Printf("...module loaded.")
-		if err := mount("none", target, "aufs", 0, branches); err != nil {
-			return fmt.Errorf("Unable to mount using aufs")
-		}
-	}
-	return nil
-}
-
 // TarLayer returns a tar archive of the image's filesystem layer.
 // TarLayer returns a tar archive of the image's filesystem layer.
-func (img *Image) TarLayer(compression archive.Compression) (archive.Archive, error) {
-	layerPath, err := img.layer()
-	if err != nil {
-		return nil, err
-	}
-	return archive.Tar(layerPath, compression)
-}
-
-func (img *Image) Mount(root, rw string) error {
-	if mounted, err := Mounted(root); err != nil {
-		return err
-	} else if mounted {
-		return fmt.Errorf("%s is already mounted", root)
-	}
-	layers, err := img.layers()
-	if err != nil {
-		return err
-	}
-	// Create the target directories if they don't exist
-	if err := os.Mkdir(root, 0755); err != nil && !os.IsExist(err) {
-		return err
-	}
-	if err := os.Mkdir(rw, 0755); err != nil && !os.IsExist(err) {
-		return err
+func (img *Image) TarLayer() (archive.Archive, error) {
+	if img.graph == nil {
+		return nil, fmt.Errorf("Can't load storage driver for unregistered image %s", img.ID)
 	}
 	}
-	if err := MountAUFS(layers, rw, root); err != nil {
-		return err
+	driver := img.graph.driver
+	if differ, ok := driver.(graphdriver.Differ); ok {
+		return differ.Diff(img.ID)
 	}
 	}
-	return nil
-}
 
 
-func (img *Image) Changes(rw string) ([]Change, error) {
-	layers, err := img.layers()
+	imgFs, err := driver.Get(img.ID)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	return Changes(layers, rw)
+	if img.Parent == "" {
+		return archive.Tar(imgFs, archive.Uncompressed)
+	} else {
+		parentFs, err := driver.Get(img.Parent)
+		if err != nil {
+			return nil, err
+		}
+		changes, err := archive.ChangesDirs(imgFs, parentFs)
+		if err != nil {
+			return nil, err
+		}
+		return archive.ExportChanges(imgFs, changes)
+	}
 }
 }
 
 
 func ValidateID(id string) error {
 func ValidateID(id string) error {
@@ -250,40 +207,6 @@ func (img *Image) History() ([]*Image, error) {
 	return parents, nil
 	return parents, nil
 }
 }
 
 
-// layers returns all the filesystem layers needed to mount an image
-// FIXME: @shykes refactor this function with the new error handling
-//        (I'll do it if I have time tonight, I focus on the rest)
-func (img *Image) layers() ([]string, error) {
-	var (
-		list []string
-		e    error
-	)
-	if err := img.WalkHistory(
-		func(img *Image) (err error) {
-			if layer, err := img.layer(); err != nil {
-				e = err
-			} else if layer != "" {
-				list = append(list, layer)
-			}
-			return err
-		},
-	); err != nil {
-		return nil, err
-	} else if e != nil { // Did an error occur inside the handler?
-		return nil, e
-	}
-	if len(list) == 0 {
-		return nil, fmt.Errorf("No layer found for image %s\n", img.ID)
-	}
-
-	// Inject the dockerinit layer (empty place-holder for mount-binding dockerinit)
-	dockerinitLayer, err := img.getDockerInitLayer()
-	if err != nil {
-		return nil, err
-	}
-	return append([]string{dockerinitLayer}, list...), nil
-}
-
 func (img *Image) WalkHistory(handler func(*Image) error) (err error) {
 func (img *Image) WalkHistory(handler func(*Image) error) (err error) {
 	currentImg := img
 	currentImg := img
 	for currentImg != nil {
 	for currentImg != nil {
@@ -310,13 +233,6 @@ func (img *Image) GetParent() (*Image, error) {
 	return img.graph.Get(img.Parent)
 	return img.graph.Get(img.Parent)
 }
 }
 
 
-func (img *Image) getDockerInitLayer() (string, error) {
-	if img.graph == nil {
-		return "", fmt.Errorf("Can't lookup dockerinit layer of unregistered image")
-	}
-	return img.graph.getDockerInitLayer()
-}
-
 func (img *Image) root() (string, error) {
 func (img *Image) root() (string, error) {
 	if img.graph == nil {
 	if img.graph == nil {
 		return "", fmt.Errorf("Can't lookup root of unregistered image")
 		return "", fmt.Errorf("Can't lookup root of unregistered image")
@@ -324,15 +240,6 @@ func (img *Image) root() (string, error) {
 	return img.graph.imageRoot(img.ID), nil
 	return img.graph.imageRoot(img.ID), nil
 }
 }
 
 
-// Return the path of an image's layer
-func (img *Image) layer() (string, error) {
-	root, err := img.root()
-	if err != nil {
-		return "", err
-	}
-	return layerPath(root), nil
-}
-
 func (img *Image) getParentsSize(size int64) int64 {
 func (img *Image) getParentsSize(size int64) int64 {
 	parentImage, err := img.GetParent()
 	parentImage, err := img.GetParent()
 	if err != nil || parentImage == nil {
 	if err != nil || parentImage == nil {
@@ -342,6 +249,25 @@ func (img *Image) getParentsSize(size int64) int64 {
 	return parentImage.getParentsSize(size)
 	return parentImage.getParentsSize(size)
 }
 }
 
 
+// Depth returns the number of parents for a
+// current image
+func (img *Image) Depth() (int, error) {
+	var (
+		count  = 0
+		parent = img
+		err    error
+	)
+
+	for parent != nil {
+		count++
+		parent, err = parent.GetParent()
+		if err != nil {
+			return -1, err
+		}
+	}
+	return count, nil
+}
+
 // Build an Image object from raw json data
 // Build an Image object from raw json data
 func NewImgJSON(src []byte) (*Image, error) {
 func NewImgJSON(src []byte) (*Image, error) {
 	ret := &Image{}
 	ret := &Image{}

+ 4 - 5
integration/commands_test.go

@@ -840,13 +840,12 @@ func TestImagesTree(t *testing.T) {
 			t.Fatal(err)
 			t.Fatal(err)
 		}
 		}
 		cmdOutput := string(cmdOutputBytes)
 		cmdOutput := string(cmdOutputBytes)
-
 		regexpStrings := []string{
 		regexpStrings := []string{
 			fmt.Sprintf("└─%s Size: (\\d+.\\d+ MB) \\(virtual \\d+.\\d+ MB\\) Tags: %s:latest", unitTestImageIDShort, unitTestImageName),
 			fmt.Sprintf("└─%s Size: (\\d+.\\d+ MB) \\(virtual \\d+.\\d+ MB\\) Tags: %s:latest", unitTestImageIDShort, unitTestImageName),
-			"(?m)^  └─[0-9a-f]+",
-			"(?m)^    └─[0-9a-f]+",
-			"(?m)^      └─[0-9a-f]+",
-			fmt.Sprintf("        └─%s Size: \\d+ B \\(virtual \\d+.\\d+ MB\\) Tags: test:latest", utils.TruncateID(image.ID)),
+			"(?m)   └─[0-9a-f]+.*",
+			"(?m)    └─[0-9a-f]+.*",
+			"(?m)      └─[0-9a-f]+.*",
+			fmt.Sprintf("(?m)^        └─%s Size: \\d+.\\d+ MB \\(virtual \\d+.\\d+ MB\\) Tags: test:latest", utils.TruncateID(image.ID)),
 		}
 		}
 
 
 		compiledRegexps := []*regexp.Regexp{}
 		compiledRegexps := []*regexp.Regexp{}

+ 1 - 1
integration/container_test.go

@@ -172,7 +172,7 @@ func TestDiff(t *testing.T) {
 	// Commit the container
 	// Commit the container
 	img, err := runtime.Commit(container1, "", "", "unit test commited image - diff", "", nil)
 	img, err := runtime.Commit(container1, "", "", "unit test commited image - diff", "", nil)
 	if err != nil {
 	if err != nil {
-		t.Error(err)
+		t.Fatal(err)
 	}
 	}
 
 
 	// Create a new container from the commited image
 	// Create a new container from the commited image

+ 13 - 11
integration/graph_test.go

@@ -2,6 +2,7 @@ package docker
 
 
 import (
 import (
 	"github.com/dotcloud/docker"
 	"github.com/dotcloud/docker"
+	"github.com/dotcloud/docker/graphdriver"
 	"io/ioutil"
 	"io/ioutil"
 	"os"
 	"os"
 	"path"
 	"path"
@@ -9,8 +10,10 @@ import (
 )
 )
 
 
 func TestMount(t *testing.T) {
 func TestMount(t *testing.T) {
-	graph := tempGraph(t)
+	graph, driver := tempGraph(t)
 	defer os.RemoveAll(graph.Root)
 	defer os.RemoveAll(graph.Root)
+	defer driver.Cleanup()
+
 	archive, err := fakeTar()
 	archive, err := fakeTar()
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
@@ -32,26 +35,25 @@ func TestMount(t *testing.T) {
 	if err := os.MkdirAll(rw, 0700); err != nil {
 	if err := os.MkdirAll(rw, 0700); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	if err := image.Mount(rootfs, rw); err != nil {
+
+	if _, err := driver.Get(image.ID); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	// FIXME: test for mount contents
-	defer func() {
-		if err := docker.Unmount(rootfs); err != nil {
-			t.Error(err)
-		}
-	}()
 }
 }
 
 
 //FIXME: duplicate
 //FIXME: duplicate
-func tempGraph(t *testing.T) *docker.Graph {
+func tempGraph(t *testing.T) (*docker.Graph, graphdriver.Driver) {
 	tmp, err := ioutil.TempDir("", "docker-graph-")
 	tmp, err := ioutil.TempDir("", "docker-graph-")
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	graph, err := docker.NewGraph(tmp)
+	driver, err := graphdriver.New(tmp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	graph, err := docker.NewGraph(tmp, driver)
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	return graph
+	return graph, driver
 }
 }

+ 159 - 20
runtime.go

@@ -5,7 +5,12 @@ import (
 	"container/list"
 	"container/list"
 	"database/sql"
 	"database/sql"
 	"fmt"
 	"fmt"
-	"github.com/dotcloud/docker/gograph"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/graphdb"
+	"github.com/dotcloud/docker/graphdriver"
+	"github.com/dotcloud/docker/graphdriver/aufs"
+	_ "github.com/dotcloud/docker/graphdriver/devmapper"
+	_ "github.com/dotcloud/docker/graphdriver/vfs"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
@@ -19,6 +24,9 @@ import (
 	"time"
 	"time"
 )
 )
 
 
+// Set the max depth to the aufs restriction
+const MaxImageDepth = 42
+
 var defaultDns = []string{"8.8.8.8", "8.8.4.4"}
 var defaultDns = []string{"8.8.8.8", "8.8.4.4"}
 
 
 type Capabilities struct {
 type Capabilities struct {
@@ -39,7 +47,8 @@ type Runtime struct {
 	volumes        *Graph
 	volumes        *Graph
 	srv            *Server
 	srv            *Server
 	config         *DaemonConfig
 	config         *DaemonConfig
-	containerGraph *gograph.Database
+	containerGraph *graphdb.Database
+	driver         graphdriver.Driver
 }
 }
 
 
 // List returns an array of all containers registered in the runtime.
 // List returns an array of all containers registered in the runtime.
@@ -118,6 +127,13 @@ func (runtime *Runtime) Register(container *Container) error {
 		return err
 		return err
 	}
 	}
 
 
+	// Get the root filesystem from the driver
+	rootfs, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return fmt.Errorf("Error getting container filesystem %s from driver %s: %s", container.ID, runtime.driver, err)
+	}
+	container.rootfs = rootfs
+
 	container.runtime = runtime
 	container.runtime = runtime
 
 
 	// Attach to stdout and stderr
 	// Attach to stdout and stderr
@@ -216,12 +232,8 @@ func (runtime *Runtime) Destroy(container *Container) error {
 		return err
 		return err
 	}
 	}
 
 
-	if mounted, err := container.Mounted(); err != nil {
-		return err
-	} else if mounted {
-		if err := container.Unmount(); err != nil {
-			return fmt.Errorf("Unable to unmount container %v: %v", container.ID, err)
-		}
+	if err := runtime.driver.Remove(container.ID); err != nil {
+		return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", runtime.driver, container.ID, err)
 	}
 	}
 
 
 	if _, err := runtime.containerGraph.Purge(container.ID); err != nil {
 	if _, err := runtime.containerGraph.Purge(container.ID); err != nil {
@@ -247,6 +259,7 @@ func (runtime *Runtime) restore() error {
 		return err
 		return err
 	}
 	}
 	containers := make(map[string]*Container)
 	containers := make(map[string]*Container)
+	currentDriver := runtime.driver.String()
 
 
 	for i, v := range dir {
 	for i, v := range dir {
 		id := v.Name()
 		id := v.Name()
@@ -258,8 +271,14 @@ func (runtime *Runtime) restore() error {
 			utils.Errorf("Failed to load container %v: %v", id, err)
 			utils.Errorf("Failed to load container %v: %v", id, err)
 			continue
 			continue
 		}
 		}
-		utils.Debugf("Loaded container %v", container.ID)
-		containers[container.ID] = container
+
+		// Ignore the container if it does not support the current driver being used by the graph
+		if container.Driver == "" && currentDriver == "aufs" || container.Driver == currentDriver {
+			utils.Debugf("Loaded container %v", container.ID)
+			containers[container.ID] = container
+		} else {
+			utils.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
+		}
 	}
 	}
 
 
 	register := func(container *Container) {
 	register := func(container *Container) {
@@ -344,6 +363,17 @@ func (runtime *Runtime) Create(config *Config, name string) (*Container, []strin
 		return nil, nil, err
 		return nil, nil, err
 	}
 	}
 
 
+	// We add 2 layers to the depth because the container's rw and
+	// init layer add to the restriction
+	depth, err := img.Depth()
+	if err != nil {
+		return nil, nil, err
+	}
+
+	if depth+2 >= MaxImageDepth {
+		return nil, nil, fmt.Errorf("Cannot create container with more than %d parents", MaxImageDepth)
+	}
+
 	checkDeprecatedExpose := func(config *Config) bool {
 	checkDeprecatedExpose := func(config *Config) bool {
 		if config != nil {
 		if config != nil {
 			if config.PortSpecs != nil {
 			if config.PortSpecs != nil {
@@ -431,6 +461,7 @@ func (runtime *Runtime) Create(config *Config, name string) (*Container, []strin
 		// FIXME: do we need to store this in the container?
 		// FIXME: do we need to store this in the container?
 		SysInitPath: sysInitPath,
 		SysInitPath: sysInitPath,
 		Name:        name,
 		Name:        name,
+		Driver:      runtime.driver.String(),
 	}
 	}
 	container.root = runtime.containerRoot(container.ID)
 	container.root = runtime.containerRoot(container.ID)
 	// Step 1: create the container directory.
 	// Step 1: create the container directory.
@@ -439,6 +470,21 @@ func (runtime *Runtime) Create(config *Config, name string) (*Container, []strin
 		return nil, nil, err
 		return nil, nil, err
 	}
 	}
 
 
+	initID := fmt.Sprintf("%s-init", container.ID)
+	if err := runtime.driver.Create(initID, img.ID); err != nil {
+		return nil, nil, err
+	}
+	initPath, err := runtime.driver.Get(initID)
+	if err != nil {
+		return nil, nil, err
+	}
+	if err := setupInitLayer(initPath); err != nil {
+		return nil, nil, err
+	}
+
+	if err := runtime.driver.Create(container.ID, initID); err != nil {
+		return nil, nil, err
+	}
 	resolvConf, err := utils.GetResolvConf()
 	resolvConf, err := utils.GetResolvConf()
 	if err != nil {
 	if err != nil {
 		return nil, nil, err
 		return nil, nil, err
@@ -549,7 +595,7 @@ func (runtime *Runtime) Children(name string) (map[string]*Container, error) {
 	}
 	}
 	children := make(map[string]*Container)
 	children := make(map[string]*Container)
 
 
-	err = runtime.containerGraph.Walk(name, func(p string, e *gograph.Entity) error {
+	err = runtime.containerGraph.Walk(name, func(p string, e *graphdb.Entity) error {
 		c := runtime.Get(e.ID())
 		c := runtime.Get(e.ID())
 		if c == nil {
 		if c == nil {
 			return fmt.Errorf("Could not get container for name %s and id %s", e.ID(), p)
 			return fmt.Errorf("Could not get container for name %s and id %s", e.ID(), p)
@@ -584,24 +630,48 @@ func NewRuntime(config *DaemonConfig) (*Runtime, error) {
 }
 }
 
 
 func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
+
+	// Set the default driver
+	graphdriver.DefaultDriver = config.GraphDriver
+
+	// Load storage driver
+	driver, err := graphdriver.New(config.Root)
+	if err != nil {
+		return nil, err
+	}
+	utils.Debugf("Using graph driver %s", driver)
+
 	runtimeRepo := path.Join(config.Root, "containers")
 	runtimeRepo := path.Join(config.Root, "containers")
 
 
 	if err := os.MkdirAll(runtimeRepo, 0700); err != nil && !os.IsExist(err) {
 	if err := os.MkdirAll(runtimeRepo, 0700); err != nil && !os.IsExist(err) {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
+	if ad, ok := driver.(*aufs.Driver); ok {
+		if err := ad.Migrate(config.Root, setupInitLayer); err != nil {
+			return nil, err
+		}
+	}
+
 	if err := linkLxcStart(config.Root); err != nil {
 	if err := linkLxcStart(config.Root); err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	g, err := NewGraph(path.Join(config.Root, "graph"))
+	g, err := NewGraph(path.Join(config.Root, "graph"), driver)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	volumes, err := NewGraph(path.Join(config.Root, "volumes"))
+
+	// We don't want to use a complex driver like aufs or devmapper
+	// for volumes, just a plain filesystem
+	volumesDriver, err := graphdriver.GetDriver("vfs", config.Root)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	repositories, err := NewTagStore(path.Join(config.Root, "repositories"), g)
+	volumes, err := NewGraph(path.Join(config.Root, "volumes"), volumesDriver)
+	if err != nil {
+		return nil, err
+	}
+	repositories, err := NewTagStore(path.Join(config.Root, "repositories-"+driver.String()), g)
 	if err != nil {
 	if err != nil {
 		return nil, fmt.Errorf("Couldn't create Tag store: %s", err)
 		return nil, fmt.Errorf("Couldn't create Tag store: %s", err)
 	}
 	}
@@ -613,20 +683,20 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	gographPath := path.Join(config.Root, "linkgraph.db")
+	graphdbPath := path.Join(config.Root, "linkgraph.db")
 	initDatabase := false
 	initDatabase := false
-	if _, err := os.Stat(gographPath); err != nil {
+	if _, err := os.Stat(graphdbPath); err != nil {
 		if os.IsNotExist(err) {
 		if os.IsNotExist(err) {
 			initDatabase = true
 			initDatabase = true
 		} else {
 		} else {
 			return nil, err
 			return nil, err
 		}
 		}
 	}
 	}
-	conn, err := sql.Open("sqlite3", gographPath)
+	conn, err := sql.Open("sqlite3", graphdbPath)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	graph, err := gograph.NewDatabase(conn, initDatabase)
+	graph, err := graphdb.NewDatabase(conn, initDatabase)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
@@ -642,6 +712,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 		volumes:        volumes,
 		volumes:        volumes,
 		config:         config,
 		config:         config,
 		containerGraph: graph,
 		containerGraph: graph,
+		driver:         driver,
 	}
 	}
 
 
 	if err := runtime.restore(); err != nil {
 	if err := runtime.restore(); err != nil {
@@ -651,8 +722,76 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 }
 }
 
 
 func (runtime *Runtime) Close() error {
 func (runtime *Runtime) Close() error {
-	runtime.networkManager.Close()
-	return runtime.containerGraph.Close()
+	errorsStrings := []string{}
+	if err := runtime.networkManager.Close(); err != nil {
+		utils.Errorf("runtime.networkManager.Close(): %s", err.Error())
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if err := runtime.driver.Cleanup(); err != nil {
+		utils.Errorf("runtime.driver.Cleanup(): %s", err.Error())
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if err := runtime.containerGraph.Close(); err != nil {
+		utils.Errorf("runtime.containerGraph.Close(): %s", err.Error())
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if len(errorsStrings) > 0 {
+		return fmt.Errorf("%s", strings.Join(errorsStrings, ", "))
+	}
+	return nil
+}
+
+func (runtime *Runtime) Mount(container *Container) error {
+	dir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return fmt.Errorf("Error getting container %s from driver %s: %s", container.ID, runtime.driver, err)
+	}
+	if container.rootfs == "" {
+		container.rootfs = dir
+	} else if container.rootfs != dir {
+		return fmt.Errorf("Error: driver %s is returning inconsistent paths for container %s ('%s' then '%s')",
+			runtime.driver, container.ID, container.rootfs, dir)
+	}
+	return nil
+}
+
+func (runtime *Runtime) Unmount(container *Container) error {
+	// FIXME: Unmount is deprecated because drivers are responsible for mounting
+	// and unmounting when necessary. Use driver.Remove() instead.
+	return nil
+}
+
+func (runtime *Runtime) Changes(container *Container) ([]archive.Change, error) {
+	if differ, ok := runtime.driver.(graphdriver.Differ); ok {
+		return differ.Changes(container.ID)
+	}
+	cDir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+	initDir, err := runtime.driver.Get(container.ID + "-init")
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container init rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+	return archive.ChangesDirs(cDir, initDir)
+}
+
+func (runtime *Runtime) Diff(container *Container) (archive.Archive, error) {
+	if differ, ok := runtime.driver.(graphdriver.Differ); ok {
+		return differ.Diff(container.ID)
+	}
+
+	changes, err := runtime.Changes(container)
+	if err != nil {
+		return nil, err
+	}
+
+	cDir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+
+	return archive.ExportChanges(cDir, changes)
 }
 }
 
 
 // Nuke kills all containers then removes all content
 // Nuke kills all containers then removes all content

+ 36 - 30
server.go

@@ -8,7 +8,7 @@ import (
 	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/auth"
 	"github.com/dotcloud/docker/auth"
 	"github.com/dotcloud/docker/engine"
 	"github.com/dotcloud/docker/engine"
-	"github.com/dotcloud/docker/gograph"
+	"github.com/dotcloud/docker/graphdb"
 	"github.com/dotcloud/docker/registry"
 	"github.com/dotcloud/docker/registry"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io"
@@ -285,7 +285,7 @@ func (srv *Server) exportImage(image *Image, tempdir string) error {
 		}
 		}
 
 
 		// serialize filesystem
 		// serialize filesystem
-		fs, err := archive.Tar(path.Join(srv.runtime.graph.Root, i.ID, "layer"), archive.Uncompressed)
+		fs, err := i.TarLayer()
 		if err != nil {
 		if err != nil {
 			return err
 			return err
 		}
 		}
@@ -342,7 +342,7 @@ func (srv *Server) ImageLoad(in io.Reader) error {
 	if err := os.Mkdir(repoDir, os.ModeDir); err != nil {
 	if err := os.Mkdir(repoDir, os.ModeDir); err != nil {
 		return err
 		return err
 	}
 	}
-	if err := archive.Untar(repoFile, repoDir); err != nil {
+	if err := archive.Untar(repoFile, repoDir, nil); err != nil {
 		return err
 		return err
 	}
 	}
 
 
@@ -596,6 +596,8 @@ func (srv *Server) DockerInfo() *APIInfo {
 	return &APIInfo{
 	return &APIInfo{
 		Containers:         len(srv.runtime.List()),
 		Containers:         len(srv.runtime.List()),
 		Images:             imgcount,
 		Images:             imgcount,
+		Driver:             srv.runtime.driver.String(),
+		DriverStatus:       srv.runtime.driver.Status(),
 		MemoryLimit:        srv.runtime.capabilities.MemoryLimit,
 		MemoryLimit:        srv.runtime.capabilities.MemoryLimit,
 		SwapLimit:          srv.runtime.capabilities.SwapLimit,
 		SwapLimit:          srv.runtime.capabilities.SwapLimit,
 		IPv4Forwarding:     !srv.runtime.capabilities.IPv4ForwardingDisabled,
 		IPv4Forwarding:     !srv.runtime.capabilities.IPv4ForwardingDisabled,
@@ -678,7 +680,7 @@ func (srv *Server) ContainerTop(name, psArgs string) (*APITop, error) {
 	return nil, fmt.Errorf("No such container: %s", name)
 	return nil, fmt.Errorf("No such container: %s", name)
 }
 }
 
 
-func (srv *Server) ContainerChanges(name string) ([]Change, error) {
+func (srv *Server) ContainerChanges(name string) ([]archive.Change, error) {
 	if container := srv.runtime.Get(name); container != nil {
 	if container := srv.runtime.Get(name); container != nil {
 		return container.Changes()
 		return container.Changes()
 	}
 	}
@@ -691,7 +693,7 @@ func (srv *Server) Containers(all, size bool, n int, since, before string) []API
 	out := []APIContainers{}
 	out := []APIContainers{}
 
 
 	names := map[string][]string{}
 	names := map[string][]string{}
-	srv.runtime.containerGraph.Walk("/", func(p string, e *gograph.Entity) error {
+	srv.runtime.containerGraph.Walk("/", func(p string, e *graphdb.Entity) error {
 		names[e.ID()] = append(names[e.ID()], p)
 		names[e.ID()] = append(names[e.ID()], p)
 		return nil
 		return nil
 	}, -1)
 	}, -1)
@@ -763,12 +765,13 @@ func (srv *Server) pullImage(r *registry.Registry, out io.Writer, imgID, endpoin
 	// FIXME: Try to stream the images?
 	// FIXME: Try to stream the images?
 	// FIXME: Launch the getRemoteImage() in goroutines
 	// FIXME: Launch the getRemoteImage() in goroutines
 
 
-	for _, id := range history {
+	for i := len(history) - 1; i >= 0; i-- {
+		id := history[i]
 
 
 		// ensure no two downloads of the same layer happen at the same time
 		// ensure no two downloads of the same layer happen at the same time
-		if err := srv.poolAdd("pull", "layer:"+id); err != nil {
+		if c, err := srv.poolAdd("pull", "layer:"+id); err != nil {
 			utils.Errorf("Image (id: %s) pull is already running, skipping: %v", id, err)
 			utils.Errorf("Image (id: %s) pull is already running, skipping: %v", id, err)
-			return nil
+			<-c
 		}
 		}
 		defer srv.poolRemove("pull", "layer:"+id)
 		defer srv.poolRemove("pull", "layer:"+id)
 
 
@@ -863,7 +866,7 @@ func (srv *Server) pullRepository(r *registry.Registry, out io.Writer, localName
 			}
 			}
 
 
 			// ensure no two downloads of the same image happen at the same time
 			// ensure no two downloads of the same image happen at the same time
-			if err := srv.poolAdd("pull", "img:"+img.ID); err != nil {
+			if _, err := srv.poolAdd("pull", "img:"+img.ID); err != nil {
 				utils.Errorf("Image (id: %s) pull is already running, skipping: %v", img.ID, err)
 				utils.Errorf("Image (id: %s) pull is already running, skipping: %v", img.ID, err)
 				if parallel {
 				if parallel {
 					errors <- nil
 					errors <- nil
@@ -934,28 +937,27 @@ func (srv *Server) pullRepository(r *registry.Registry, out io.Writer, localName
 	return nil
 	return nil
 }
 }
 
 
-func (srv *Server) poolAdd(kind, key string) error {
+func (srv *Server) poolAdd(kind, key string) (chan struct{}, error) {
 	srv.Lock()
 	srv.Lock()
 	defer srv.Unlock()
 	defer srv.Unlock()
 
 
-	if _, exists := srv.pullingPool[key]; exists {
-		return fmt.Errorf("pull %s is already in progress", key)
+	if c, exists := srv.pullingPool[key]; exists {
+		return c, fmt.Errorf("pull %s is already in progress", key)
 	}
 	}
-	if _, exists := srv.pushingPool[key]; exists {
-		return fmt.Errorf("push %s is already in progress", key)
+	if c, exists := srv.pushingPool[key]; exists {
+		return c, fmt.Errorf("push %s is already in progress", key)
 	}
 	}
 
 
+	c := make(chan struct{})
 	switch kind {
 	switch kind {
 	case "pull":
 	case "pull":
-		srv.pullingPool[key] = struct{}{}
-		break
+		srv.pullingPool[key] = c
 	case "push":
 	case "push":
-		srv.pushingPool[key] = struct{}{}
-		break
+		srv.pushingPool[key] = c
 	default:
 	default:
-		return fmt.Errorf("Unknown pool type")
+		return nil, fmt.Errorf("Unknown pool type")
 	}
 	}
-	return nil
+	return c, nil
 }
 }
 
 
 func (srv *Server) poolRemove(kind, key string) error {
 func (srv *Server) poolRemove(kind, key string) error {
@@ -963,11 +965,15 @@ func (srv *Server) poolRemove(kind, key string) error {
 	defer srv.Unlock()
 	defer srv.Unlock()
 	switch kind {
 	switch kind {
 	case "pull":
 	case "pull":
-		delete(srv.pullingPool, key)
-		break
+		if c, exists := srv.pullingPool[key]; exists {
+			close(c)
+			delete(srv.pullingPool, key)
+		}
 	case "push":
 	case "push":
-		delete(srv.pushingPool, key)
-		break
+		if c, exists := srv.pushingPool[key]; exists {
+			close(c)
+			delete(srv.pushingPool, key)
+		}
 	default:
 	default:
 		return fmt.Errorf("Unknown pool type")
 		return fmt.Errorf("Unknown pool type")
 	}
 	}
@@ -979,7 +985,7 @@ func (srv *Server) ImagePull(localName string, tag string, out io.Writer, sf *ut
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
-	if err := srv.poolAdd("pull", localName+":"+tag); err != nil {
+	if _, err := srv.poolAdd("pull", localName+":"+tag); err != nil {
 		return err
 		return err
 	}
 	}
 	defer srv.poolRemove("pull", localName+":"+tag)
 	defer srv.poolRemove("pull", localName+":"+tag)
@@ -1174,7 +1180,7 @@ func (srv *Server) pushImage(r *registry.Registry, out io.Writer, remote, imgID,
 
 
 // FIXME: Allow to interrupt current push when new push of same image is done.
 // FIXME: Allow to interrupt current push when new push of same image is done.
 func (srv *Server) ImagePush(localName string, out io.Writer, sf *utils.StreamFormatter, authConfig *auth.AuthConfig, metaHeaders map[string][]string) error {
 func (srv *Server) ImagePush(localName string, out io.Writer, sf *utils.StreamFormatter, authConfig *auth.AuthConfig, metaHeaders map[string][]string) error {
-	if err := srv.poolAdd("push", localName); err != nil {
+	if _, err := srv.poolAdd("push", localName); err != nil {
 		return err
 		return err
 	}
 	}
 	defer srv.poolRemove("push", localName)
 	defer srv.poolRemove("push", localName)
@@ -1820,8 +1826,8 @@ func NewServer(eng *engine.Engine, config *DaemonConfig) (*Server, error) {
 	srv := &Server{
 	srv := &Server{
 		Eng:         eng,
 		Eng:         eng,
 		runtime:     runtime,
 		runtime:     runtime,
-		pullingPool: make(map[string]struct{}),
-		pushingPool: make(map[string]struct{}),
+		pullingPool: make(map[string]chan struct{}),
+		pushingPool: make(map[string]chan struct{}),
 		events:      make([]utils.JSONMessage, 0, 64), //only keeps the 64 last events
 		events:      make([]utils.JSONMessage, 0, 64), //only keeps the 64 last events
 		listeners:   make(map[string]chan utils.JSONMessage),
 		listeners:   make(map[string]chan utils.JSONMessage),
 		reqFactory:  nil,
 		reqFactory:  nil,
@@ -1872,8 +1878,8 @@ func (srv *Server) GetEvents() []utils.JSONMessage {
 type Server struct {
 type Server struct {
 	sync.RWMutex
 	sync.RWMutex
 	runtime     *Runtime
 	runtime     *Runtime
-	pullingPool map[string]struct{}
-	pushingPool map[string]struct{}
+	pullingPool map[string]chan struct{}
+	pushingPool map[string]chan struct{}
 	events      []utils.JSONMessage
 	events      []utils.JSONMessage
 	listeners   map[string]chan utils.JSONMessage
 	listeners   map[string]chan utils.JSONMessage
 	reqFactory  *utils.HTTPRequestFactory
 	reqFactory  *utils.HTTPRequestFactory

+ 12 - 23
server_unit_test.go

@@ -8,49 +8,38 @@ import (
 
 
 func TestPools(t *testing.T) {
 func TestPools(t *testing.T) {
 	srv := &Server{
 	srv := &Server{
-		pullingPool: make(map[string]struct{}),
-		pushingPool: make(map[string]struct{}),
+		pullingPool: make(map[string]chan struct{}),
+		pushingPool: make(map[string]chan struct{}),
 	}
 	}
 
 
-	err := srv.poolAdd("pull", "test1")
-	if err != nil {
+	if _, err := srv.poolAdd("pull", "test1"); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	err = srv.poolAdd("pull", "test2")
-	if err != nil {
+	if _, err := srv.poolAdd("pull", "test2"); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	err = srv.poolAdd("push", "test1")
-	if err == nil || err.Error() != "pull test1 is already in progress" {
+	if _, err := srv.poolAdd("push", "test1"); err == nil || err.Error() != "pull test1 is already in progress" {
 		t.Fatalf("Expected `pull test1 is already in progress`")
 		t.Fatalf("Expected `pull test1 is already in progress`")
 	}
 	}
-	err = srv.poolAdd("pull", "test1")
-	if err == nil || err.Error() != "pull test1 is already in progress" {
+	if _, err := srv.poolAdd("pull", "test1"); err == nil || err.Error() != "pull test1 is already in progress" {
 		t.Fatalf("Expected `pull test1 is already in progress`")
 		t.Fatalf("Expected `pull test1 is already in progress`")
 	}
 	}
-	err = srv.poolAdd("wait", "test3")
-	if err == nil || err.Error() != "Unknown pool type" {
+	if _, err := srv.poolAdd("wait", "test3"); err == nil || err.Error() != "Unknown pool type" {
 		t.Fatalf("Expected `Unknown pool type`")
 		t.Fatalf("Expected `Unknown pool type`")
 	}
 	}
-
-	err = srv.poolRemove("pull", "test2")
-	if err != nil {
+	if err := srv.poolRemove("pull", "test2"); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	err = srv.poolRemove("pull", "test2")
-	if err != nil {
+	if err := srv.poolRemove("pull", "test2"); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	err = srv.poolRemove("pull", "test1")
-	if err != nil {
+	if err := srv.poolRemove("pull", "test1"); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	err = srv.poolRemove("push", "test1")
-	if err != nil {
+	if err := srv.poolRemove("push", "test1"); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
-	err = srv.poolRemove("wait", "test3")
-	if err == nil || err.Error() != "Unknown pool type" {
+	if err := srv.poolRemove("wait", "test3"); err == nil || err.Error() != "Unknown pool type" {
 		t.Fatalf("Expected `Unknown pool type`")
 		t.Fatalf("Expected `Unknown pool type`")
 	}
 	}
 }
 }

+ 9 - 3
tags_unit_test.go

@@ -1,6 +1,7 @@
 package docker
 package docker
 
 
 import (
 import (
+	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 	"os"
 	"os"
 	"path"
 	"path"
@@ -8,12 +9,16 @@ import (
 )
 )
 
 
 const (
 const (
-	testImageName string = "myapp"
-	testImageID   string = "foo"
+	testImageName = "myapp"
+	testImageID   = "foo"
 )
 )
 
 
 func mkTestTagStore(root string, t *testing.T) *TagStore {
 func mkTestTagStore(root string, t *testing.T) *TagStore {
-	graph, err := NewGraph(root)
+	driver, err := graphdriver.New(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	graph, err := NewGraph(root, driver)
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
@@ -42,6 +47,7 @@ func TestLookupImage(t *testing.T) {
 	}
 	}
 	defer os.RemoveAll(tmp)
 	defer os.RemoveAll(tmp)
 	store := mkTestTagStore(tmp, t)
 	store := mkTestTagStore(tmp, t)
+	defer store.graph.driver.Cleanup()
 
 
 	if img, err := store.LookupImage(testImageName); err != nil {
 	if img, err := store.LookupImage(testImageName); err != nil {
 		t.Fatal(err)
 		t.Fatal(err)

+ 48 - 0
utils.go

@@ -1,14 +1,43 @@
 package docker
 package docker
 
 
+/*
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <errno.h>
+
+// See linux.git/fs/btrfs/ioctl.h
+#define BTRFS_IOCTL_MAGIC 0x94
+#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
+
+int
+btrfs_reflink(int fd_out, int fd_in)
+{
+  int res;
+  res = ioctl(fd_out, BTRFS_IOC_CLONE, fd_in);
+  if (res < 0)
+    return errno;
+  return 0;
+}
+
+*/
+import "C"
 import (
 import (
 	"fmt"
 	"fmt"
+	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/namesgenerator"
 	"github.com/dotcloud/docker/namesgenerator"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
+	"io"
 	"io/ioutil"
 	"io/ioutil"
+	"os"
 	"strconv"
 	"strconv"
 	"strings"
 	"strings"
+	"syscall"
 )
 )
 
 
+type Change struct {
+	archive.Change
+}
+
 // Compare two Config struct. Do not compare the "Image" nor "Hostname" fields
 // Compare two Config struct. Do not compare the "Image" nor "Hostname" fields
 // If OpenStdin is set, then it differs
 // If OpenStdin is set, then it differs
 func CompareConfig(a, b *Config) bool {
 func CompareConfig(a, b *Config) bool {
@@ -317,6 +346,14 @@ func migratePortMappings(config *Config, hostConfig *HostConfig) error {
 	return nil
 	return nil
 }
 }
 
 
+func BtrfsReflink(fd_out, fd_in uintptr) error {
+	res := C.btrfs_reflink(C.int(fd_out), C.int(fd_in))
+	if res != 0 {
+		return syscall.Errno(res)
+	}
+	return nil
+}
+
 // Links come in the format of
 // Links come in the format of
 // name:alias
 // name:alias
 func parseLink(rawLink string) (map[string]string, error) {
 func parseLink(rawLink string) (map[string]string, error) {
@@ -349,3 +386,14 @@ func (c *checker) Exists(name string) bool {
 func generateRandomName(runtime *Runtime) (string, error) {
 func generateRandomName(runtime *Runtime) (string, error) {
 	return namesgenerator.GenerateRandomName(&checker{runtime})
 	return namesgenerator.GenerateRandomName(&checker{runtime})
 }
 }
+
+func CopyFile(dstFile, srcFile *os.File) error {
+	err := BtrfsReflink(dstFile.Fd(), srcFile.Fd())
+	if err == nil {
+		return nil
+	}
+
+	// Fall back to normal copy
+	_, err = io.Copy(dstFile, srcFile)
+	return err
+}

+ 35 - 0
utils/fs.go

@@ -0,0 +1,35 @@
+package utils
+
+import (
+	"os"
+	"path/filepath"
+	"syscall"
+)
+
+// TreeSize walks a directory tree and returns its total size in bytes.
+func TreeSize(dir string) (size int64, err error) {
+	data := make(map[uint64]bool)
+	err = filepath.Walk(dir, func(d string, fileInfo os.FileInfo, e error) error {
+		// Ignore directory sizes
+		if fileInfo == nil {
+			return nil
+		}
+
+		s := fileInfo.Size()
+		if fileInfo.IsDir() || s == 0 {
+			return nil
+		}
+
+		// Check inode to handle hard links correctly
+		inode := fileInfo.Sys().(*syscall.Stat_t).Ino
+		if _, exists := data[inode]; exists {
+			return nil
+		}
+		data[inode] = false
+
+		size += s
+
+		return nil
+	})
+	return
+}