浏览代码

Merge pull request #24771 from aquasecurity/xfs_quota

Implement XFS quota for overlay2
Michael Crosby 8 年之前
父节点
当前提交
8863a9f62c

+ 62 - 11
daemon/graphdriver/overlay2/overlay.go

@@ -24,6 +24,7 @@ import (
 	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/pkg/parsers/kernel"
 	"github.com/docker/docker/pkg/parsers/kernel"
+	"github.com/docker/go-units"
 
 
 	"github.com/opencontainers/runc/libcontainer/label"
 	"github.com/opencontainers/runc/libcontainer/label"
 )
 )
@@ -76,15 +77,25 @@ const (
 	idLength = 26
 	idLength = 26
 )
 )
 
 
+type overlayOptions struct {
+	overrideKernelCheck bool
+	quota               graphdriver.Quota
+}
+
 // Driver contains information about the home directory and the list of active mounts that are created using this driver.
 // Driver contains information about the home directory and the list of active mounts that are created using this driver.
 type Driver struct {
 type Driver struct {
-	home    string
-	uidMaps []idtools.IDMap
-	gidMaps []idtools.IDMap
-	ctr     *graphdriver.RefCounter
+	home     string
+	uidMaps  []idtools.IDMap
+	gidMaps  []idtools.IDMap
+	ctr      *graphdriver.RefCounter
+	quotaCtl *graphdriver.QuotaCtl
+	options  overlayOptions
 }
 }
 
 
-var backingFs = "<unknown>"
+var (
+	backingFs             = "<unknown>"
+	projectQuotaSupported = false
+)
 
 
 func init() {
 func init() {
 	graphdriver.Register(driverName, Init)
 	graphdriver.Register(driverName, Init)
@@ -150,11 +161,16 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 		ctr:     graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
 		ctr:     graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
 	}
 	}
 
 
-	return d, nil
-}
+	if backingFs == "xfs" {
+		// Try to enable project quota support over xfs.
+		if d.quotaCtl, err = graphdriver.NewQuotaCtl(home); err == nil {
+			projectQuotaSupported = true
+		}
+	}
 
 
-type overlayOptions struct {
-	overrideKernelCheck bool
+	logrus.Debugf("backingFs=%s,  projectQuotaSupported=%v", backingFs, projectQuotaSupported)
+
+	return d, nil
 }
 }
 
 
 func parseOptions(options []string) (*overlayOptions, error) {
 func parseOptions(options []string) (*overlayOptions, error) {
@@ -171,6 +187,7 @@ func parseOptions(options []string) (*overlayOptions, error) {
 			if err != nil {
 			if err != nil {
 				return nil, err
 				return nil, err
 			}
 			}
+
 		default:
 		default:
 			return nil, fmt.Errorf("overlay2: Unknown option %s\n", key)
 			return nil, fmt.Errorf("overlay2: Unknown option %s\n", key)
 		}
 		}
@@ -253,8 +270,8 @@ func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[s
 // The parent filesystem is used to configure these directories for the overlay.
 // The parent filesystem is used to configure these directories for the overlay.
 func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) (retErr error) {
 func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) (retErr error) {
 
 
-	if len(storageOpt) != 0 {
-		return fmt.Errorf("--storage-opt is not supported for overlay")
+	if len(storageOpt) != 0 && !projectQuotaSupported {
+		return fmt.Errorf("--storage-opt is supported only for overlay over xfs with 'pquota' mount option")
 	}
 	}
 
 
 	dir := d.dir(id)
 	dir := d.dir(id)
@@ -277,6 +294,20 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
 		}
 		}
 	}()
 	}()
 
 
+	if len(storageOpt) > 0 {
+		driver := &Driver{}
+		if err := d.parseStorageOpt(storageOpt, driver); err != nil {
+			return err
+		}
+
+		if driver.options.quota.Size > 0 {
+			// Set container disk quota limit
+			if err := d.quotaCtl.SetQuota(dir, driver.options.quota); err != nil {
+				return err
+			}
+		}
+	}
+
 	if err := idtools.MkdirAs(path.Join(dir, "diff"), 0755, rootUID, rootGID); err != nil {
 	if err := idtools.MkdirAs(path.Join(dir, "diff"), 0755, rootUID, rootGID); err != nil {
 		return err
 		return err
 	}
 	}
@@ -316,6 +347,26 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
 	return nil
 	return nil
 }
 }
 
 
+// Parse overlay storage options
+func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
+	// Read size to set the disk project quota per container
+	for key, val := range storageOpt {
+		key := strings.ToLower(key)
+		switch key {
+		case "size":
+			size, err := units.RAMInBytes(val)
+			if err != nil {
+				return err
+			}
+			driver.options.quota.Size = uint64(size)
+		default:
+			return fmt.Errorf("Unknown option %s", key)
+		}
+	}
+
+	return nil
+}
+
 func (d *Driver) getLower(parent string) (string, error) {
 func (d *Driver) getLower(parent string) (string, error) {
 	parentDir := d.dir(parent)
 	parentDir := d.dir(parent)
 
 

+ 324 - 0
daemon/graphdriver/projectquota.go

@@ -0,0 +1,324 @@
+// +build linux
+
+//
+// projectquota.go - implements XFS project quota controls
+// for setting quota limits on a newly created directory.
+// It currently supports the legacy XFS specific ioctls.
+//
+// TODO: use generic quota control ioctl FS_IOC_FS{GET,SET}XATTR
+//       for both xfs/ext4 for kernel version >= v4.5
+//
+
+package graphdriver
+
+/*
+#include <stdlib.h>
+#include <dirent.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/dqblk_xfs.h>
+struct fsxattr {
+	__u32		fsx_xflags;
+	__u32		fsx_extsize;
+	__u32		fsx_nextents;
+	__u32		fsx_projid;
+	unsigned char	fsx_pad[12];
+};
+#define FS_XFLAG_PROJINHERIT	0x00000200
+#define FS_IOC_FSGETXATTR		_IOR ('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR		_IOW ('X', 32, struct fsxattr)
+
+#define PRJQUOTA	2
+#define XFS_PROJ_QUOTA	2
+#define Q_XSETPQLIM QCMD(Q_XSETQLIM, PRJQUOTA)
+#define Q_XGETPQUOTA QCMD(Q_XGETQUOTA, PRJQUOTA)
+*/
+import "C"
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path"
+	"path/filepath"
+	"syscall"
+	"unsafe"
+
+	"github.com/Sirupsen/logrus"
+)
+
+// Quota limit params - currently we only control blocks hard limit
+type Quota struct {
+	Size uint64
+}
+
+// QuotaCtl - Context to be used by storage driver (e.g. overlay)
+// who wants to apply project quotas to container dirs
+type QuotaCtl struct {
+	backingFsBlockDev string
+	nextProjectID     uint32
+	quotas            map[string]uint32
+}
+
+// NewQuotaCtl - initialize project quota support.
+// Test to make sure that quota can be set on a test dir and find
+// the first project id to be used for the next container create.
+//
+// Returns nil (and error) if project quota is not supported.
+//
+// First get the project id of the home directory.
+// This test will fail if the backing fs is not xfs.
+//
+// xfs_quota tool can be used to assign a project id to the driver home directory, e.g.:
+//    echo 999:/var/lib/docker/overlay2 >> /etc/projects
+//    echo docker:999 >> /etc/projid
+//    xfs_quota -x -c 'project -s docker' /<xfs mount point>
+//
+// In that case, the home directory project id will be used as a "start offset"
+// and all containers will be assigned larger project ids (e.g. >= 1000).
+// This is a way to prevent xfs_quota management from conflicting with docker.
+//
+// Then try to create a test directory with the next project id and set a quota
+// on it. If that works, continue to scan existing containers to map allocated
+// project ids.
+//
+func NewQuotaCtl(basePath string) (*QuotaCtl, error) {
+	//
+	// Get project id of parent dir as minimal id to be used by driver
+	//
+	minProjectID, err := getProjectID(basePath)
+	if err != nil {
+		return nil, err
+	}
+	minProjectID++
+
+	//
+	// create backing filesystem device node
+	//
+	backingFsBlockDev, err := makeBackingFsDev(basePath)
+	if err != nil {
+		return nil, err
+	}
+
+	//
+	// Test if filesystem supports project quotas by trying to set
+	// a quota on the first available project id
+	//
+	quota := Quota{
+		Size: 0,
+	}
+	if err := setProjectQuota(backingFsBlockDev, minProjectID, quota); err != nil {
+		return nil, err
+	}
+
+	q := QuotaCtl{
+		backingFsBlockDev: backingFsBlockDev,
+		nextProjectID:     minProjectID + 1,
+		quotas:            make(map[string]uint32),
+	}
+
+	//
+	// get first project id to be used for next container
+	//
+	err = q.findNextProjectID(basePath)
+	if err != nil {
+		return nil, err
+	}
+
+	logrus.Debugf("NewQuotaCtl(%s): nextProjectID = %d", basePath, q.nextProjectID)
+	return &q, nil
+}
+
+// SetQuota - assign a unique project id to directory and set the quota limits
+// for that project id
+func (q *QuotaCtl) SetQuota(targetPath string, quota Quota) error {
+
+	projectID, ok := q.quotas[targetPath]
+	if !ok {
+		projectID = q.nextProjectID
+
+		//
+		// assign project id to new container directory
+		//
+		err := setProjectID(targetPath, projectID)
+		if err != nil {
+			return err
+		}
+
+		q.quotas[targetPath] = projectID
+		q.nextProjectID++
+	}
+
+	//
+	// set the quota limit for the container's project id
+	//
+	logrus.Debugf("SetQuota(%s, %d): projectID=%d", targetPath, quota.Size, projectID)
+	return setProjectQuota(q.backingFsBlockDev, projectID, quota)
+}
+
+// setProjectQuota - set the quota for project id on xfs block device
+func setProjectQuota(backingFsBlockDev string, projectID uint32, quota Quota) error {
+	var d C.fs_disk_quota_t
+	d.d_version = C.FS_DQUOT_VERSION
+	d.d_id = C.__u32(projectID)
+	d.d_flags = C.XFS_PROJ_QUOTA
+
+	d.d_fieldmask = C.FS_DQ_BHARD | C.FS_DQ_BSOFT
+	d.d_blk_hardlimit = C.__u64(quota.Size / 512)
+	d.d_blk_softlimit = d.d_blk_hardlimit
+
+	var cs = C.CString(backingFsBlockDev)
+	defer C.free(unsafe.Pointer(cs))
+
+	_, _, errno := syscall.Syscall6(syscall.SYS_QUOTACTL, C.Q_XSETPQLIM,
+		uintptr(unsafe.Pointer(cs)), uintptr(d.d_id),
+		uintptr(unsafe.Pointer(&d)), 0, 0)
+	if errno != 0 {
+		return fmt.Errorf("Failed to set quota limit for projid %d on %s: %v",
+			projectID, backingFsBlockDev, errno.Error())
+	}
+
+	return nil
+}
+
+// GetQuota - get the quota limits of a directory that was configured with SetQuota
+func (q *QuotaCtl) GetQuota(targetPath string, quota *Quota) error {
+
+	projectID, ok := q.quotas[targetPath]
+	if !ok {
+		return fmt.Errorf("quota not found for path : %s", targetPath)
+	}
+
+	//
+	// get the quota limit for the container's project id
+	//
+	var d C.fs_disk_quota_t
+
+	var cs = C.CString(q.backingFsBlockDev)
+	defer C.free(unsafe.Pointer(cs))
+
+	_, _, errno := syscall.Syscall6(syscall.SYS_QUOTACTL, C.Q_XGETPQUOTA,
+		uintptr(unsafe.Pointer(cs)), uintptr(C.__u32(projectID)),
+		uintptr(unsafe.Pointer(&d)), 0, 0)
+	if errno != 0 {
+		return fmt.Errorf("Failed to get quota limit for projid %d on %s: %v",
+			projectID, q.backingFsBlockDev, errno.Error())
+	}
+	quota.Size = uint64(d.d_blk_hardlimit) * 512
+
+	return nil
+}
+
+// getProjectID - get the project id of path on xfs
+func getProjectID(targetPath string) (uint32, error) {
+	dir, err := openDir(targetPath)
+	if err != nil {
+		return 0, err
+	}
+	defer closeDir(dir)
+
+	var fsx C.struct_fsxattr
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
+		uintptr(unsafe.Pointer(&fsx)))
+	if errno != 0 {
+		return 0, fmt.Errorf("Failed to get projid for %s: %v", targetPath, errno.Error())
+	}
+
+	return uint32(fsx.fsx_projid), nil
+}
+
+// setProjectID - set the project id of path on xfs
+func setProjectID(targetPath string, projectID uint32) error {
+	dir, err := openDir(targetPath)
+	if err != nil {
+		return err
+	}
+	defer closeDir(dir)
+
+	var fsx C.struct_fsxattr
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
+		uintptr(unsafe.Pointer(&fsx)))
+	if errno != 0 {
+		return fmt.Errorf("Failed to get projid for %s: %v", targetPath, errno.Error())
+	}
+	fsx.fsx_projid = C.__u32(projectID)
+	fsx.fsx_xflags |= C.FS_XFLAG_PROJINHERIT
+	_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSSETXATTR,
+		uintptr(unsafe.Pointer(&fsx)))
+	if errno != 0 {
+		return fmt.Errorf("Failed to set projid for %s: %v", targetPath, errno.Error())
+	}
+
+	return nil
+}
+
+// findNextProjectID - find the next project id to be used for containers
+// by scanning driver home directory to find used project ids
+func (q *QuotaCtl) findNextProjectID(home string) error {
+	files, err := ioutil.ReadDir(home)
+	if err != nil {
+		return fmt.Errorf("read directory failed : %s", home)
+	}
+	for _, file := range files {
+		if !file.IsDir() {
+			continue
+		}
+		path := filepath.Join(home, file.Name())
+		projid, err := getProjectID(path)
+		if err != nil {
+			return err
+		}
+		if projid > 0 {
+			q.quotas[path] = projid
+		}
+		if q.nextProjectID <= projid {
+			q.nextProjectID = projid + 1
+		}
+	}
+
+	return nil
+}
+
+func free(p *C.char) {
+	C.free(unsafe.Pointer(p))
+}
+
+func openDir(path string) (*C.DIR, error) {
+	Cpath := C.CString(path)
+	defer free(Cpath)
+
+	dir := C.opendir(Cpath)
+	if dir == nil {
+		return nil, fmt.Errorf("Can't open dir")
+	}
+	return dir, nil
+}
+
+func closeDir(dir *C.DIR) {
+	if dir != nil {
+		C.closedir(dir)
+	}
+}
+
+func getDirFd(dir *C.DIR) uintptr {
+	return uintptr(C.dirfd(dir))
+}
+
+// Get the backing block device of the driver home directory
+// and create a block device node under the home directory
+// to be used by quotactl commands
+func makeBackingFsDev(home string) (string, error) {
+	fileinfo, err := os.Stat(home)
+	if err != nil {
+		return "", err
+	}
+
+	backingFsBlockDev := path.Join(home, "backingFsBlockDev")
+	// Re-create just in case comeone copied the home directory over to a new device
+	syscall.Unlink(backingFsBlockDev)
+	stat := fileinfo.Sys().(*syscall.Stat_t)
+	if err := syscall.Mknod(backingFsBlockDev, syscall.S_IFBLK|0600, int(stat.Dev)); err != nil {
+		return "", fmt.Errorf("Failed to mknod %s: %v", backingFsBlockDev, err)
+	}
+
+	return backingFsBlockDev, nil
+}

+ 7 - 2
docs/reference/commandline/create.md

@@ -167,8 +167,13 @@ Set storage driver options per container.
     $ docker create -it --storage-opt size=120G fedora /bin/bash
     $ docker create -it --storage-opt size=120G fedora /bin/bash
 
 
 This (size) will allow to set the container rootfs size to 120G at creation time. 
 This (size) will allow to set the container rootfs size to 120G at creation time. 
-User cannot pass a size less than the Default BaseFS Size. This option is only 
-available for the `devicemapper`, `btrfs`, `windowsfilter`, and `zfs` graph drivers.
+This option is only available for the `devicemapper`, `btrfs`, `overlay2`,
+`windowsfilter` and `zfs` graph drivers.
+For the `devicemapper`, `btrfs`, `windowsfilter` and `zfs` graph drivers,
+user cannot pass a size less than the Default BaseFS Size.
+For the `overlay2` storage driver, the size option is only available if the
+backing fs is `xfs` and mounted with the `pquota` mount option.
+Under these conditions, user can pass any size less then the backing fs size.
 
 
 ### Specify isolation technology for container (--isolation)
 ### Specify isolation technology for container (--isolation)
 
 

+ 7 - 2
docs/reference/commandline/run.md

@@ -194,8 +194,13 @@ The `-w` lets the command being executed inside directory given, here
     $ docker run -it --storage-opt size=120G fedora /bin/bash
     $ docker run -it --storage-opt size=120G fedora /bin/bash
 
 
 This (size) will allow to set the container rootfs size to 120G at creation time.
 This (size) will allow to set the container rootfs size to 120G at creation time.
-User cannot pass a size less than the Default BaseFS Size. This option is only
-available for the `devicemapper`, `btrfs`, `windowsfilter`, and `zfs` graph drivers.
+This option is only available for the `devicemapper`, `btrfs`, `overlay2`,
+`windowsfilter` and `zfs` graph drivers.
+For the `devicemapper`, `btrfs`, `windowsfilter` and `zfs` graph drivers,
+user cannot pass a size less than the Default BaseFS Size.
+For the `overlay2` storage driver, the size option is only available if the
+backing fs is `xfs` and mounted with the `pquota` mount option.
+Under these conditions, user can pass any size less then the backing fs size.
 
 
 ### Mount tmpfs (--tmpfs)
 ### Mount tmpfs (--tmpfs)
 
 

+ 5 - 2
man/docker-create.1.md

@@ -343,8 +343,11 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
 
 
    $ docker create -it --storage-opt size=120G fedora /bin/bash
    $ docker create -it --storage-opt size=120G fedora /bin/bash
 
 
-   This (size) will allow to set the container rootfs size to 120G at creation time. User cannot pass a size less than the Default BaseFS Size.
-   This option is only available for the `devicemapper`, `btrfs`, and `zfs` graph drivers.
+   This (size) will allow to set the container rootfs size to 120G at creation time.
+   This option is only available for the `devicemapper`, `btrfs`, `overlay2` and `zfs` graph drivers.
+   For the `devicemapper`, `btrfs` and `zfs` storage drivers, user cannot pass a size less than the Default BaseFS Size.
+   For the `overlay2` storage driver, the size option is only available if the backing fs is `xfs` and mounted with the `pquota` mount option.
+   Under these conditions, user can pass any size less then the backing fs size.
   
   
 **--stop-signal**=*SIGTERM*
 **--stop-signal**=*SIGTERM*
   Signal to stop a container. Default is SIGTERM.
   Signal to stop a container. Default is SIGTERM.

+ 5 - 2
man/docker-run.1.md

@@ -493,8 +493,11 @@ incompatible with any restart policy other than `none`.
 
 
    $ docker run -it --storage-opt size=120G fedora /bin/bash
    $ docker run -it --storage-opt size=120G fedora /bin/bash
 
 
-   This (size) will allow to set the container rootfs size to 120G at creation time. User cannot pass a size less than the Default BaseFS Size.
-   This option is only available for the `devicemapper`, `btrfs`, and `zfs` graph drivers.
+   This (size) will allow to set the container rootfs size to 120G at creation time.
+   This option is only available for the `devicemapper`, `btrfs`, `overlay2`  and `zfs` graph drivers.
+   For the `devicemapper`, `btrfs` and `zfs` storage drivers, user cannot pass a size less than the Default BaseFS Size.
+   For the `overlay2` storage driver, the size option is only available if the backing fs is `xfs` and mounted with the `pquota` mount option.
+   Under these conditions, user can pass any size less then the backing fs size.
 
 
 **--stop-signal**=*SIGTERM*
 **--stop-signal**=*SIGTERM*
   Signal to stop a container. Default is SIGTERM.
   Signal to stop a container. Default is SIGTERM.