Add overlayfs graph backend

This backend uses the overlayfs union filesystem for containers
plus hard link file sharing for images.

Each container/image can have a "root" subdirectory which is a plain
filesystem hierarchy, or they can use overlayfs.

If they use overlayfs there is a "upper" directory and a "lower-id"
file, as well as "merged" and "work" directories. The "upper"
directory has the upper layer of the overlay, and "lower-id" contains
the id of the parent whose "root" directory shall be used as the lower
layer in the overlay. The overlay itself is mounted in the "merged"
directory, and the "work" dir is needed for overlayfs to work.

When a overlay layer is created there are two cases, either the
parent has a "root" dir, then we start out with a empty "upper"
directory overlaid on the parents root. This is typically the
case with the init layer of a container which is based on an image.
If there is no "root" in the parent, we inherit the lower-id from
the parent and start by making a copy if the parents "upper" dir.
This is typically the case for a container layer which copies
its parent -init upper layer.

Additionally we also have a custom implementation of ApplyLayer
which makes a recursive copy of the parent "root" layer using
hardlinks to share file data, and then applies the layer on top
of that. This means all chile images share file (but not directory)
data with the parent.

Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
This commit is contained in:
Alexander Larsson 2014-08-19 11:23:55 +02:00
parent 29ebb53b52
commit 453552c838
5 changed files with 563 additions and 0 deletions

View file

@ -0,0 +1,7 @@
// +build !exclude_graphdriver_overlayfs
package daemon
import (
_ "github.com/docker/docker/daemon/graphdriver/overlayfs"
)

View file

@ -81,6 +81,8 @@ var (
"btrfs",
"devicemapper",
"vfs",
// experimental, has to be enabled manually for now
"overlayfs",
}
ErrNotSupported = errors.New("driver not supported")

View file

@ -0,0 +1,157 @@
// +build linux
package overlayfs
import (
"fmt"
"io"
"os"
"path/filepath"
"syscall"
"github.com/docker/docker/pkg/system"
)
type CopyFlags int
const (
CopyHardlink CopyFlags = 1 << iota
)
func copyRegular(srcPath, dstPath string, mode os.FileMode) error {
srcFile, err := os.Open(srcPath)
if err != nil {
return err
}
defer srcFile.Close()
dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE, mode)
if err != nil {
return err
}
defer dstFile.Close()
_, err = io.Copy(dstFile, srcFile)
return err
}
func copyXattr(srcPath, dstPath, attr string) error {
data, err := system.Lgetxattr(srcPath, attr)
if err != nil {
return err
}
if data != nil {
if err := system.Lsetxattr(dstPath, attr, data, 0); err != nil {
return err
}
}
return nil
}
func copyDir(srcDir, dstDir string, flags CopyFlags) error {
err := filepath.Walk(srcDir, func(srcPath string, f os.FileInfo, err error) error {
if err != nil {
return err
}
// Rebase path
relPath, err := filepath.Rel(srcDir, srcPath)
if err != nil {
return err
}
dstPath := filepath.Join(dstDir, relPath)
if err != nil {
return err
}
stat, ok := f.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("Unable to get raw syscall.Stat_t data for %s", srcPath)
}
switch f.Mode() & os.ModeType {
case 0: // Regular file
if flags&CopyHardlink != 0 {
if err := os.Link(srcPath, dstPath); err != nil {
return err
}
} else {
if err := copyRegular(srcPath, dstPath, f.Mode()); err != nil {
return err
}
}
case os.ModeDir:
if err := os.Mkdir(dstPath, f.Mode()); err != nil && !os.IsExist(err) {
return err
}
case os.ModeSymlink:
link, err := os.Readlink(srcPath)
if err != nil {
return err
}
if err := os.Symlink(link, dstPath); err != nil {
return err
}
case os.ModeNamedPipe:
fallthrough
case os.ModeSocket:
if err := syscall.Mkfifo(dstPath, stat.Mode); err != nil {
return err
}
case os.ModeDevice:
if err := syscall.Mknod(dstPath, stat.Mode, int(stat.Rdev)); err != nil {
return err
}
default:
return fmt.Errorf("Unknown file type for %s\n", srcPath)
}
if err := os.Lchown(dstPath, int(stat.Uid), int(stat.Gid)); err != nil {
return err
}
if err := copyXattr(srcPath, dstPath, "security.capability"); err != nil {
return err
}
// We need to copy this attribute if it appears in an overlayfs upper layer, as
// this function is used to copy those. It is set by overlayfs if a directory
// is removed and then re-created and should not inherit anything from the
// same dir in the lower dir.
if err := copyXattr(srcPath, dstPath, "trusted.overlay.opaque"); err != nil {
return err
}
isSymlink := f.Mode()&os.ModeSymlink != 0
// There is no LChmod, so ignore mode for symlink. Also, this
// must happen after chown, as that can modify the file mode
if !isSymlink {
if err := os.Chmod(dstPath, f.Mode()); err != nil {
return err
}
}
ts := []syscall.Timespec{stat.Atim, stat.Mtim}
// syscall.UtimesNano doesn't support a NOFOLLOW flag atm, and
if !isSymlink {
if err := system.UtimesNano(dstPath, ts); err != nil {
return err
}
} else {
if err := system.LUtimesNano(dstPath, ts); err != nil {
return err
}
}
return nil
})
return err
}

View file

@ -0,0 +1,369 @@
// +build linux
package overlayfs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path"
"strings"
"sync"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/graphdriver"
"github.com/docker/docker/pkg/archive"
"github.com/docker/libcontainer/label"
)
// This is a small wrapper over the NaiveDiffWriter that lets us have a custom
// implementation of ApplyDiff()
var (
ErrApplyDiffFallback = fmt.Errorf("Fall back to normal ApplyDiff")
)
type ApplyDiffProtoDriver interface {
graphdriver.ProtoDriver
ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error)
}
type naiveDiffDriverWithApply struct {
graphdriver.Driver
applyDiff ApplyDiffProtoDriver
}
func NaiveDiffDriverWithApply(driver ApplyDiffProtoDriver) graphdriver.Driver {
return &naiveDiffDriverWithApply{
Driver: graphdriver.NaiveDiffDriver(driver),
applyDiff: driver,
}
}
func (d *naiveDiffDriverWithApply) ApplyDiff(id, parent string, diff archive.ArchiveReader) (int64, error) {
b, err := d.applyDiff.ApplyDiff(id, parent, diff)
if err == ErrApplyDiffFallback {
return d.Driver.ApplyDiff(id, parent, diff)
}
return b, err
}
// This backend uses the overlayfs union filesystem for containers
// plus hard link file sharing for images.
// Each container/image can have a "root" subdirectory which is a plain
// filesystem hierarchy, or they can use overlayfs.
// If they use overlayfs there is a "upper" directory and a "lower-id"
// file, as well as "merged" and "work" directories. The "upper"
// directory has the upper layer of the overlay, and "lower-id" contains
// the id of the parent whose "root" directory shall be used as the lower
// layer in the overlay. The overlay itself is mounted in the "merged"
// directory, and the "work" dir is needed for overlayfs to work.
// When a overlay layer is created there are two cases, either the
// parent has a "root" dir, then we start out with a empty "upper"
// directory overlaid on the parents root. This is typically the
// case with the init layer of a container which is based on an image.
// If there is no "root" in the parent, we inherit the lower-id from
// the parent and start by making a copy if the parents "upper" dir.
// This is typically the case for a container layer which copies
// its parent -init upper layer.
// Additionally we also have a custom implementation of ApplyLayer
// which makes a recursive copy of the parent "root" layer using
// hardlinks to share file data, and then applies the layer on top
// of that. This means all child images share file (but not directory)
// data with the parent.
type ActiveMount struct {
count int
path string
mounted bool
}
type Driver struct {
home string
sync.Mutex // Protects concurrent modification to active
active map[string]*ActiveMount
}
func init() {
graphdriver.Register("overlayfs", Init)
}
func Init(home string, options []string) (graphdriver.Driver, error) {
if err := supportsOverlayfs(); err != nil {
return nil, graphdriver.ErrNotSupported
}
// Create the driver home dir
if err := os.MkdirAll(home, 0755); err != nil && !os.IsExist(err) {
return nil, err
}
d := &Driver{
home: home,
active: make(map[string]*ActiveMount),
}
return NaiveDiffDriverWithApply(d), nil
}
func supportsOverlayfs() error {
// We can try to modprobe overlayfs first before looking at
// proc/filesystems for when overlayfs is supported
exec.Command("modprobe", "overlayfs").Run()
f, err := os.Open("/proc/filesystems")
if err != nil {
return err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if strings.Contains(s.Text(), "overlayfs") {
return nil
}
}
return graphdriver.ErrNotSupported
}
func (d *Driver) String() string {
return "overlayfs"
}
func (d *Driver) Status() [][2]string {
return nil
}
func (d *Driver) Cleanup() error {
return nil
}
func (d *Driver) Create(id string, parent string) (retErr error) {
dir := d.dir(id)
if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
return err
}
if err := os.Mkdir(dir, 0700); err != nil {
return err
}
defer func() {
// Clean up on failure
if retErr != nil {
os.RemoveAll(dir)
}
}()
// Toplevel images are just a "root" dir
if parent == "" {
if err := os.Mkdir(path.Join(dir, "root"), 0755); err != nil {
return err
}
return nil
}
parentDir := d.dir(parent)
// Ensure parent exists
if _, err := os.Lstat(parentDir); err != nil {
return err
}
// If parent has a root, just do a overlayfs to it
parentRoot := path.Join(parentDir, "root")
if s, err := os.Lstat(parentRoot); err == nil {
if err := os.Mkdir(path.Join(dir, "upper"), s.Mode()); err != nil {
return err
}
if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
return err
}
if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
return err
}
if err := ioutil.WriteFile(path.Join(dir, "lower-id"), []byte(parent), 0666); err != nil {
return err
}
return nil
}
// Otherwise, copy the upper and the lower-id from the parent
lowerId, err := ioutil.ReadFile(path.Join(parentDir, "lower-id"))
if err != nil {
return err
}
if err := ioutil.WriteFile(path.Join(dir, "lower-id"), lowerId, 0666); err != nil {
return err
}
parentUpperDir := path.Join(parentDir, "upper")
s, err := os.Lstat(parentUpperDir)
if err != nil {
return err
}
upperDir := path.Join(dir, "upper")
if err := os.Mkdir(upperDir, s.Mode()); err != nil {
return err
}
if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
return err
}
if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
return err
}
return copyDir(parentUpperDir, upperDir, 0)
}
func (d *Driver) dir(id string) string {
return path.Join(d.home, id)
}
func (d *Driver) Remove(id string) error {
dir := d.dir(id)
if _, err := os.Stat(dir); err != nil {
return err
}
return os.RemoveAll(dir)
}
func (d *Driver) Get(id string, mountLabel string) (string, error) {
// Protect the d.active from concurrent access
d.Lock()
defer d.Unlock()
mount := d.active[id]
if mount != nil {
mount.count++
return mount.path, nil
} else {
mount = &ActiveMount{count: 1}
}
dir := d.dir(id)
if _, err := os.Stat(dir); err != nil {
return "", err
}
// If id has a root, just return it
rootDir := path.Join(dir, "root")
if _, err := os.Stat(rootDir); err == nil {
mount.path = rootDir
d.active[id] = mount
return mount.path, nil
}
lowerId, err := ioutil.ReadFile(path.Join(dir, "lower-id"))
if err != nil {
return "", err
}
lowerDir := path.Join(d.dir(string(lowerId)), "root")
upperDir := path.Join(dir, "upper")
workDir := path.Join(dir, "work")
mergedDir := path.Join(dir, "merged")
opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerDir, upperDir, workDir)
if err := syscall.Mount("overlayfs", mergedDir, "overlayfs", 0, label.FormatMountLabel(opts, mountLabel)); err != nil {
return "", err
}
mount.path = mergedDir
mount.mounted = true
d.active[id] = mount
return mount.path, nil
}
func (d *Driver) Put(id string) {
// Protect the d.active from concurrent access
d.Lock()
defer d.Unlock()
mount := d.active[id]
if mount == nil {
log.Debugf("Put on a non-mounted device %s", id)
return
}
mount.count--
if mount.count > 0 {
return
}
if mount.mounted {
if err := syscall.Unmount(mount.path, 0); err != nil {
log.Debugf("Failed to unmount %s overlayfs: %v", id, err)
}
}
delete(d.active, id)
}
func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) (bytes int64, err error) {
dir := d.dir(id)
if parent == "" {
return 0, ErrApplyDiffFallback
}
parentRootDir := path.Join(d.dir(parent), "root")
if _, err := os.Stat(parentRootDir); err != nil {
return 0, ErrApplyDiffFallback
}
// We now know there is a parent, and it has a "root" directory containing
// the full root filesystem. We can just hardlink it and apply the
// layer. This relies on two things:
// 1) ApplyDiff is only run once on a clean (no writes to upper layer) container
// 2) ApplyDiff doesn't do any in-place writes to files (would break hardlinks)
// These are all currently true and are not expected to break
tmpRootDir, err := ioutil.TempDir(dir, "tmproot")
if err != nil {
return 0, err
}
defer func() {
if err != nil {
os.RemoveAll(tmpRootDir)
} else {
os.RemoveAll(path.Join(dir, "upper"))
os.RemoveAll(path.Join(dir, "work"))
os.RemoveAll(path.Join(dir, "merged"))
os.RemoveAll(path.Join(dir, "lower-id"))
}
}()
if err = copyDir(parentRootDir, tmpRootDir, CopyHardlink); err != nil {
return 0, err
}
if err := archive.ApplyLayer(tmpRootDir, diff); err != nil {
return 0, err
}
rootDir := path.Join(dir, "root")
if err := os.Rename(tmpRootDir, rootDir); err != nil {
return 0, err
}
changes, err := archive.ChangesDirs(rootDir, parentRootDir)
if err != nil {
return 0, err
}
return archive.ChangesSize(rootDir, changes), nil
}
func (d *Driver) Exists(id string) bool {
_, err := os.Stat(d.dir(id))
return err == nil
}

View file

@ -0,0 +1,28 @@
package overlayfs
import (
"github.com/docker/docker/daemon/graphdriver/graphtest"
"testing"
)
// This avoids creating a new driver for each test if all tests are run
// Make sure to put new tests between TestOverlayfsSetup and TestOverlayfsTeardown
func TestOverlayfsSetup(t *testing.T) {
graphtest.GetDriver(t, "overlayfs")
}
func TestOverlayfsCreateEmpty(t *testing.T) {
graphtest.DriverTestCreateEmpty(t, "overlayfs")
}
func TestOverlayfsCreateBase(t *testing.T) {
graphtest.DriverTestCreateBase(t, "overlayfs")
}
func TestOverlayfsCreateSnap(t *testing.T) {
graphtest.DriverTestCreateSnap(t, "overlayfs")
}
func TestOverlayfsTeardown(t *testing.T) {
graphtest.PutDriver(t)
}