Add overlayfs graph backend
This backend uses the overlayfs union filesystem for containers plus hard link file sharing for images. Each container/image can have a "root" subdirectory which is a plain filesystem hierarchy, or they can use overlayfs. If they use overlayfs there is a "upper" directory and a "lower-id" file, as well as "merged" and "work" directories. The "upper" directory has the upper layer of the overlay, and "lower-id" contains the id of the parent whose "root" directory shall be used as the lower layer in the overlay. The overlay itself is mounted in the "merged" directory, and the "work" dir is needed for overlayfs to work. When a overlay layer is created there are two cases, either the parent has a "root" dir, then we start out with a empty "upper" directory overlaid on the parents root. This is typically the case with the init layer of a container which is based on an image. If there is no "root" in the parent, we inherit the lower-id from the parent and start by making a copy if the parents "upper" dir. This is typically the case for a container layer which copies its parent -init upper layer. Additionally we also have a custom implementation of ApplyLayer which makes a recursive copy of the parent "root" layer using hardlinks to share file data, and then applies the layer on top of that. This means all chile images share file (but not directory) data with the parent. Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
This commit is contained in:
parent
29ebb53b52
commit
453552c838
5 changed files with 563 additions and 0 deletions
7
daemon/daemon_overlayfs.go
Normal file
7
daemon/daemon_overlayfs.go
Normal file
|
@ -0,0 +1,7 @@
|
|||
// +build !exclude_graphdriver_overlayfs
|
||||
|
||||
package daemon
|
||||
|
||||
import (
|
||||
_ "github.com/docker/docker/daemon/graphdriver/overlayfs"
|
||||
)
|
|
@ -81,6 +81,8 @@ var (
|
|||
"btrfs",
|
||||
"devicemapper",
|
||||
"vfs",
|
||||
// experimental, has to be enabled manually for now
|
||||
"overlayfs",
|
||||
}
|
||||
|
||||
ErrNotSupported = errors.New("driver not supported")
|
||||
|
|
157
daemon/graphdriver/overlayfs/copy.go
Normal file
157
daemon/graphdriver/overlayfs/copy.go
Normal file
|
@ -0,0 +1,157 @@
|
|||
// +build linux
|
||||
|
||||
package overlayfs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/docker/pkg/system"
|
||||
)
|
||||
|
||||
type CopyFlags int
|
||||
|
||||
const (
|
||||
CopyHardlink CopyFlags = 1 << iota
|
||||
)
|
||||
|
||||
func copyRegular(srcPath, dstPath string, mode os.FileMode) error {
|
||||
srcFile, err := os.Open(srcPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer srcFile.Close()
|
||||
|
||||
dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE, mode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer dstFile.Close()
|
||||
|
||||
_, err = io.Copy(dstFile, srcFile)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func copyXattr(srcPath, dstPath, attr string) error {
|
||||
data, err := system.Lgetxattr(srcPath, attr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if data != nil {
|
||||
if err := system.Lsetxattr(dstPath, attr, data, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyDir(srcDir, dstDir string, flags CopyFlags) error {
|
||||
err := filepath.Walk(srcDir, func(srcPath string, f os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Rebase path
|
||||
relPath, err := filepath.Rel(srcDir, srcPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dstPath := filepath.Join(dstDir, relPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stat, ok := f.Sys().(*syscall.Stat_t)
|
||||
if !ok {
|
||||
return fmt.Errorf("Unable to get raw syscall.Stat_t data for %s", srcPath)
|
||||
}
|
||||
|
||||
switch f.Mode() & os.ModeType {
|
||||
case 0: // Regular file
|
||||
if flags&CopyHardlink != 0 {
|
||||
if err := os.Link(srcPath, dstPath); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := copyRegular(srcPath, dstPath, f.Mode()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
case os.ModeDir:
|
||||
if err := os.Mkdir(dstPath, f.Mode()); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
case os.ModeSymlink:
|
||||
link, err := os.Readlink(srcPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.Symlink(link, dstPath); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
case os.ModeNamedPipe:
|
||||
fallthrough
|
||||
case os.ModeSocket:
|
||||
if err := syscall.Mkfifo(dstPath, stat.Mode); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
case os.ModeDevice:
|
||||
if err := syscall.Mknod(dstPath, stat.Mode, int(stat.Rdev)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
default:
|
||||
return fmt.Errorf("Unknown file type for %s\n", srcPath)
|
||||
}
|
||||
|
||||
if err := os.Lchown(dstPath, int(stat.Uid), int(stat.Gid)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := copyXattr(srcPath, dstPath, "security.capability"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// We need to copy this attribute if it appears in an overlayfs upper layer, as
|
||||
// this function is used to copy those. It is set by overlayfs if a directory
|
||||
// is removed and then re-created and should not inherit anything from the
|
||||
// same dir in the lower dir.
|
||||
if err := copyXattr(srcPath, dstPath, "trusted.overlay.opaque"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
isSymlink := f.Mode()&os.ModeSymlink != 0
|
||||
|
||||
// There is no LChmod, so ignore mode for symlink. Also, this
|
||||
// must happen after chown, as that can modify the file mode
|
||||
if !isSymlink {
|
||||
if err := os.Chmod(dstPath, f.Mode()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
ts := []syscall.Timespec{stat.Atim, stat.Mtim}
|
||||
// syscall.UtimesNano doesn't support a NOFOLLOW flag atm, and
|
||||
if !isSymlink {
|
||||
if err := system.UtimesNano(dstPath, ts); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := system.LUtimesNano(dstPath, ts); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return err
|
||||
}
|
369
daemon/graphdriver/overlayfs/overlayfs.go
Normal file
369
daemon/graphdriver/overlayfs/overlayfs.go
Normal file
|
@ -0,0 +1,369 @@
|
|||
// +build linux
|
||||
|
||||
package overlayfs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
log "github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/daemon/graphdriver"
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
"github.com/docker/libcontainer/label"
|
||||
)
|
||||
|
||||
// This is a small wrapper over the NaiveDiffWriter that lets us have a custom
|
||||
// implementation of ApplyDiff()
|
||||
|
||||
var (
|
||||
ErrApplyDiffFallback = fmt.Errorf("Fall back to normal ApplyDiff")
|
||||
)
|
||||
|
||||
type ApplyDiffProtoDriver interface {
|
||||
graphdriver.ProtoDriver
|
||||
ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error)
|
||||
}
|
||||
|
||||
type naiveDiffDriverWithApply struct {
|
||||
graphdriver.Driver
|
||||
applyDiff ApplyDiffProtoDriver
|
||||
}
|
||||
|
||||
func NaiveDiffDriverWithApply(driver ApplyDiffProtoDriver) graphdriver.Driver {
|
||||
return &naiveDiffDriverWithApply{
|
||||
Driver: graphdriver.NaiveDiffDriver(driver),
|
||||
applyDiff: driver,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *naiveDiffDriverWithApply) ApplyDiff(id, parent string, diff archive.ArchiveReader) (int64, error) {
|
||||
b, err := d.applyDiff.ApplyDiff(id, parent, diff)
|
||||
if err == ErrApplyDiffFallback {
|
||||
return d.Driver.ApplyDiff(id, parent, diff)
|
||||
}
|
||||
return b, err
|
||||
}
|
||||
|
||||
// This backend uses the overlayfs union filesystem for containers
|
||||
// plus hard link file sharing for images.
|
||||
|
||||
// Each container/image can have a "root" subdirectory which is a plain
|
||||
// filesystem hierarchy, or they can use overlayfs.
|
||||
|
||||
// If they use overlayfs there is a "upper" directory and a "lower-id"
|
||||
// file, as well as "merged" and "work" directories. The "upper"
|
||||
// directory has the upper layer of the overlay, and "lower-id" contains
|
||||
// the id of the parent whose "root" directory shall be used as the lower
|
||||
// layer in the overlay. The overlay itself is mounted in the "merged"
|
||||
// directory, and the "work" dir is needed for overlayfs to work.
|
||||
|
||||
// When a overlay layer is created there are two cases, either the
|
||||
// parent has a "root" dir, then we start out with a empty "upper"
|
||||
// directory overlaid on the parents root. This is typically the
|
||||
// case with the init layer of a container which is based on an image.
|
||||
// If there is no "root" in the parent, we inherit the lower-id from
|
||||
// the parent and start by making a copy if the parents "upper" dir.
|
||||
// This is typically the case for a container layer which copies
|
||||
// its parent -init upper layer.
|
||||
|
||||
// Additionally we also have a custom implementation of ApplyLayer
|
||||
// which makes a recursive copy of the parent "root" layer using
|
||||
// hardlinks to share file data, and then applies the layer on top
|
||||
// of that. This means all child images share file (but not directory)
|
||||
// data with the parent.
|
||||
|
||||
type ActiveMount struct {
|
||||
count int
|
||||
path string
|
||||
mounted bool
|
||||
}
|
||||
type Driver struct {
|
||||
home string
|
||||
sync.Mutex // Protects concurrent modification to active
|
||||
active map[string]*ActiveMount
|
||||
}
|
||||
|
||||
func init() {
|
||||
graphdriver.Register("overlayfs", Init)
|
||||
}
|
||||
|
||||
func Init(home string, options []string) (graphdriver.Driver, error) {
|
||||
if err := supportsOverlayfs(); err != nil {
|
||||
return nil, graphdriver.ErrNotSupported
|
||||
}
|
||||
|
||||
// Create the driver home dir
|
||||
if err := os.MkdirAll(home, 0755); err != nil && !os.IsExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
d := &Driver{
|
||||
home: home,
|
||||
active: make(map[string]*ActiveMount),
|
||||
}
|
||||
|
||||
return NaiveDiffDriverWithApply(d), nil
|
||||
}
|
||||
|
||||
func supportsOverlayfs() error {
|
||||
// We can try to modprobe overlayfs first before looking at
|
||||
// proc/filesystems for when overlayfs is supported
|
||||
exec.Command("modprobe", "overlayfs").Run()
|
||||
|
||||
f, err := os.Open("/proc/filesystems")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if strings.Contains(s.Text(), "overlayfs") {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return graphdriver.ErrNotSupported
|
||||
}
|
||||
|
||||
func (d *Driver) String() string {
|
||||
return "overlayfs"
|
||||
}
|
||||
|
||||
func (d *Driver) Status() [][2]string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Cleanup() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Create(id string, parent string) (retErr error) {
|
||||
dir := d.dir(id)
|
||||
if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(dir, 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
// Clean up on failure
|
||||
if retErr != nil {
|
||||
os.RemoveAll(dir)
|
||||
}
|
||||
}()
|
||||
|
||||
// Toplevel images are just a "root" dir
|
||||
if parent == "" {
|
||||
if err := os.Mkdir(path.Join(dir, "root"), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
parentDir := d.dir(parent)
|
||||
|
||||
// Ensure parent exists
|
||||
if _, err := os.Lstat(parentDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If parent has a root, just do a overlayfs to it
|
||||
parentRoot := path.Join(parentDir, "root")
|
||||
|
||||
if s, err := os.Lstat(parentRoot); err == nil {
|
||||
if err := os.Mkdir(path.Join(dir, "upper"), s.Mode()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := ioutil.WriteFile(path.Join(dir, "lower-id"), []byte(parent), 0666); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Otherwise, copy the upper and the lower-id from the parent
|
||||
|
||||
lowerId, err := ioutil.ReadFile(path.Join(parentDir, "lower-id"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := ioutil.WriteFile(path.Join(dir, "lower-id"), lowerId, 0666); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
parentUpperDir := path.Join(parentDir, "upper")
|
||||
s, err := os.Lstat(parentUpperDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
upperDir := path.Join(dir, "upper")
|
||||
if err := os.Mkdir(upperDir, s.Mode()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return copyDir(parentUpperDir, upperDir, 0)
|
||||
}
|
||||
|
||||
func (d *Driver) dir(id string) string {
|
||||
return path.Join(d.home, id)
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(id string) error {
|
||||
dir := d.dir(id)
|
||||
if _, err := os.Stat(dir); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.RemoveAll(dir)
|
||||
}
|
||||
|
||||
func (d *Driver) Get(id string, mountLabel string) (string, error) {
|
||||
// Protect the d.active from concurrent access
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
|
||||
mount := d.active[id]
|
||||
if mount != nil {
|
||||
mount.count++
|
||||
return mount.path, nil
|
||||
} else {
|
||||
mount = &ActiveMount{count: 1}
|
||||
}
|
||||
|
||||
dir := d.dir(id)
|
||||
if _, err := os.Stat(dir); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// If id has a root, just return it
|
||||
rootDir := path.Join(dir, "root")
|
||||
if _, err := os.Stat(rootDir); err == nil {
|
||||
mount.path = rootDir
|
||||
d.active[id] = mount
|
||||
return mount.path, nil
|
||||
}
|
||||
|
||||
lowerId, err := ioutil.ReadFile(path.Join(dir, "lower-id"))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
lowerDir := path.Join(d.dir(string(lowerId)), "root")
|
||||
upperDir := path.Join(dir, "upper")
|
||||
workDir := path.Join(dir, "work")
|
||||
mergedDir := path.Join(dir, "merged")
|
||||
|
||||
opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerDir, upperDir, workDir)
|
||||
if err := syscall.Mount("overlayfs", mergedDir, "overlayfs", 0, label.FormatMountLabel(opts, mountLabel)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
mount.path = mergedDir
|
||||
mount.mounted = true
|
||||
d.active[id] = mount
|
||||
|
||||
return mount.path, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Put(id string) {
|
||||
// Protect the d.active from concurrent access
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
|
||||
mount := d.active[id]
|
||||
if mount == nil {
|
||||
log.Debugf("Put on a non-mounted device %s", id)
|
||||
return
|
||||
}
|
||||
|
||||
mount.count--
|
||||
if mount.count > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if mount.mounted {
|
||||
if err := syscall.Unmount(mount.path, 0); err != nil {
|
||||
log.Debugf("Failed to unmount %s overlayfs: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
delete(d.active, id)
|
||||
}
|
||||
|
||||
func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) (bytes int64, err error) {
|
||||
dir := d.dir(id)
|
||||
|
||||
if parent == "" {
|
||||
return 0, ErrApplyDiffFallback
|
||||
}
|
||||
|
||||
parentRootDir := path.Join(d.dir(parent), "root")
|
||||
if _, err := os.Stat(parentRootDir); err != nil {
|
||||
return 0, ErrApplyDiffFallback
|
||||
}
|
||||
|
||||
// We now know there is a parent, and it has a "root" directory containing
|
||||
// the full root filesystem. We can just hardlink it and apply the
|
||||
// layer. This relies on two things:
|
||||
// 1) ApplyDiff is only run once on a clean (no writes to upper layer) container
|
||||
// 2) ApplyDiff doesn't do any in-place writes to files (would break hardlinks)
|
||||
// These are all currently true and are not expected to break
|
||||
|
||||
tmpRootDir, err := ioutil.TempDir(dir, "tmproot")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(tmpRootDir)
|
||||
} else {
|
||||
os.RemoveAll(path.Join(dir, "upper"))
|
||||
os.RemoveAll(path.Join(dir, "work"))
|
||||
os.RemoveAll(path.Join(dir, "merged"))
|
||||
os.RemoveAll(path.Join(dir, "lower-id"))
|
||||
}
|
||||
}()
|
||||
|
||||
if err = copyDir(parentRootDir, tmpRootDir, CopyHardlink); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if err := archive.ApplyLayer(tmpRootDir, diff); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
rootDir := path.Join(dir, "root")
|
||||
if err := os.Rename(tmpRootDir, rootDir); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
changes, err := archive.ChangesDirs(rootDir, parentRootDir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return archive.ChangesSize(rootDir, changes), nil
|
||||
}
|
||||
|
||||
func (d *Driver) Exists(id string) bool {
|
||||
_, err := os.Stat(d.dir(id))
|
||||
return err == nil
|
||||
}
|
28
daemon/graphdriver/overlayfs/overlayfs_test.go
Normal file
28
daemon/graphdriver/overlayfs/overlayfs_test.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
package overlayfs
|
||||
|
||||
import (
|
||||
"github.com/docker/docker/daemon/graphdriver/graphtest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// This avoids creating a new driver for each test if all tests are run
|
||||
// Make sure to put new tests between TestOverlayfsSetup and TestOverlayfsTeardown
|
||||
func TestOverlayfsSetup(t *testing.T) {
|
||||
graphtest.GetDriver(t, "overlayfs")
|
||||
}
|
||||
|
||||
func TestOverlayfsCreateEmpty(t *testing.T) {
|
||||
graphtest.DriverTestCreateEmpty(t, "overlayfs")
|
||||
}
|
||||
|
||||
func TestOverlayfsCreateBase(t *testing.T) {
|
||||
graphtest.DriverTestCreateBase(t, "overlayfs")
|
||||
}
|
||||
|
||||
func TestOverlayfsCreateSnap(t *testing.T) {
|
||||
graphtest.DriverTestCreateSnap(t, "overlayfs")
|
||||
}
|
||||
|
||||
func TestOverlayfsTeardown(t *testing.T) {
|
||||
graphtest.PutDriver(t)
|
||||
}
|
Loading…
Reference in a new issue