Przeglądaj źródła

Merge pull request #24771 from aquasecurity/xfs_quota

Implement XFS quota for overlay2
Michael Crosby 8 lat temu
rodzic
commit
8863a9f62c

+ 62 - 11
daemon/graphdriver/overlay2/overlay.go

@@ -24,6 +24,7 @@ import (
 	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/pkg/parsers/kernel"
+	"github.com/docker/go-units"
 
 	"github.com/opencontainers/runc/libcontainer/label"
 )
@@ -76,15 +77,25 @@ const (
 	idLength = 26
 )
 
+type overlayOptions struct {
+	overrideKernelCheck bool
+	quota               graphdriver.Quota
+}
+
 // Driver contains information about the home directory and the list of active mounts that are created using this driver.
 type Driver struct {
-	home    string
-	uidMaps []idtools.IDMap
-	gidMaps []idtools.IDMap
-	ctr     *graphdriver.RefCounter
+	home     string
+	uidMaps  []idtools.IDMap
+	gidMaps  []idtools.IDMap
+	ctr      *graphdriver.RefCounter
+	quotaCtl *graphdriver.QuotaCtl
+	options  overlayOptions
 }
 
-var backingFs = "<unknown>"
+var (
+	backingFs             = "<unknown>"
+	projectQuotaSupported = false
+)
 
 func init() {
 	graphdriver.Register(driverName, Init)
@@ -150,11 +161,16 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 		ctr:     graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
 	}
 
-	return d, nil
-}
+	if backingFs == "xfs" {
+		// Try to enable project quota support over xfs.
+		if d.quotaCtl, err = graphdriver.NewQuotaCtl(home); err == nil {
+			projectQuotaSupported = true
+		}
+	}
 
-type overlayOptions struct {
-	overrideKernelCheck bool
+	logrus.Debugf("backingFs=%s,  projectQuotaSupported=%v", backingFs, projectQuotaSupported)
+
+	return d, nil
 }
 
 func parseOptions(options []string) (*overlayOptions, error) {
@@ -171,6 +187,7 @@ func parseOptions(options []string) (*overlayOptions, error) {
 			if err != nil {
 				return nil, err
 			}
+
 		default:
 			return nil, fmt.Errorf("overlay2: Unknown option %s\n", key)
 		}
@@ -253,8 +270,8 @@ func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[s
 // The parent filesystem is used to configure these directories for the overlay.
 func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) (retErr error) {
 
-	if len(storageOpt) != 0 {
-		return fmt.Errorf("--storage-opt is not supported for overlay")
+	if len(storageOpt) != 0 && !projectQuotaSupported {
+		return fmt.Errorf("--storage-opt is supported only for overlay over xfs with 'pquota' mount option")
 	}
 
 	dir := d.dir(id)
@@ -277,6 +294,20 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
 		}
 	}()
 
+	if len(storageOpt) > 0 {
+		driver := &Driver{}
+		if err := d.parseStorageOpt(storageOpt, driver); err != nil {
+			return err
+		}
+
+		if driver.options.quota.Size > 0 {
+			// Set container disk quota limit
+			if err := d.quotaCtl.SetQuota(dir, driver.options.quota); err != nil {
+				return err
+			}
+		}
+	}
+
 	if err := idtools.MkdirAs(path.Join(dir, "diff"), 0755, rootUID, rootGID); err != nil {
 		return err
 	}
@@ -316,6 +347,26 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
 	return nil
 }
 
+// Parse overlay storage options
+func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
+	// Read size to set the disk project quota per container
+	for key, val := range storageOpt {
+		key := strings.ToLower(key)
+		switch key {
+		case "size":
+			size, err := units.RAMInBytes(val)
+			if err != nil {
+				return err
+			}
+			driver.options.quota.Size = uint64(size)
+		default:
+			return fmt.Errorf("Unknown option %s", key)
+		}
+	}
+
+	return nil
+}
+
 func (d *Driver) getLower(parent string) (string, error) {
 	parentDir := d.dir(parent)
 

+ 324 - 0
daemon/graphdriver/projectquota.go

@@ -0,0 +1,324 @@
+// +build linux
+
+//
+// projectquota.go - implements XFS project quota controls
+// for setting quota limits on a newly created directory.
+// It currently supports the legacy XFS specific ioctls.
+//
+// TODO: use generic quota control ioctl FS_IOC_FS{GET,SET}XATTR
+//       for both xfs/ext4 for kernel version >= v4.5
+//
+
+package graphdriver
+
+/*
+#include <stdlib.h>
+#include <dirent.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/dqblk_xfs.h>
+struct fsxattr {
+	__u32		fsx_xflags;
+	__u32		fsx_extsize;
+	__u32		fsx_nextents;
+	__u32		fsx_projid;
+	unsigned char	fsx_pad[12];
+};
+#define FS_XFLAG_PROJINHERIT	0x00000200
+#define FS_IOC_FSGETXATTR		_IOR ('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR		_IOW ('X', 32, struct fsxattr)
+
+#define PRJQUOTA	2
+#define XFS_PROJ_QUOTA	2
+#define Q_XSETPQLIM QCMD(Q_XSETQLIM, PRJQUOTA)
+#define Q_XGETPQUOTA QCMD(Q_XGETQUOTA, PRJQUOTA)
+*/
+import "C"
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path"
+	"path/filepath"
+	"syscall"
+	"unsafe"
+
+	"github.com/Sirupsen/logrus"
+)
+
+// Quota limit params - currently we only control blocks hard limit
+type Quota struct {
+	Size uint64
+}
+
+// QuotaCtl - Context to be used by storage driver (e.g. overlay)
+// who wants to apply project quotas to container dirs
+type QuotaCtl struct {
+	backingFsBlockDev string
+	nextProjectID     uint32
+	quotas            map[string]uint32
+}
+
+// NewQuotaCtl - initialize project quota support.
+// Test to make sure that quota can be set on a test dir and find
+// the first project id to be used for the next container create.
+//
+// Returns nil (and error) if project quota is not supported.
+//
+// First get the project id of the home directory.
+// This test will fail if the backing fs is not xfs.
+//
+// xfs_quota tool can be used to assign a project id to the driver home directory, e.g.:
+//    echo 999:/var/lib/docker/overlay2 >> /etc/projects
+//    echo docker:999 >> /etc/projid
+//    xfs_quota -x -c 'project -s docker' /<xfs mount point>
+//
+// In that case, the home directory project id will be used as a "start offset"
+// and all containers will be assigned larger project ids (e.g. >= 1000).
+// This is a way to prevent xfs_quota management from conflicting with docker.
+//
+// Then try to create a test directory with the next project id and set a quota
+// on it. If that works, continue to scan existing containers to map allocated
+// project ids.
+//
+func NewQuotaCtl(basePath string) (*QuotaCtl, error) {
+	//
+	// Get project id of parent dir as minimal id to be used by driver
+	//
+	minProjectID, err := getProjectID(basePath)
+	if err != nil {
+		return nil, err
+	}
+	minProjectID++
+
+	//
+	// create backing filesystem device node
+	//
+	backingFsBlockDev, err := makeBackingFsDev(basePath)
+	if err != nil {
+		return nil, err
+	}
+
+	//
+	// Test if filesystem supports project quotas by trying to set
+	// a quota on the first available project id
+	//
+	quota := Quota{
+		Size: 0,
+	}
+	if err := setProjectQuota(backingFsBlockDev, minProjectID, quota); err != nil {
+		return nil, err
+	}
+
+	q := QuotaCtl{
+		backingFsBlockDev: backingFsBlockDev,
+		nextProjectID:     minProjectID + 1,
+		quotas:            make(map[string]uint32),
+	}
+
+	//
+	// get first project id to be used for next container
+	//
+	err = q.findNextProjectID(basePath)
+	if err != nil {
+		return nil, err
+	}
+
+	logrus.Debugf("NewQuotaCtl(%s): nextProjectID = %d", basePath, q.nextProjectID)
+	return &q, nil
+}
+
+// SetQuota - assign a unique project id to directory and set the quota limits
+// for that project id
+func (q *QuotaCtl) SetQuota(targetPath string, quota Quota) error {
+
+	projectID, ok := q.quotas[targetPath]
+	if !ok {
+		projectID = q.nextProjectID
+
+		//
+		// assign project id to new container directory
+		//
+		err := setProjectID(targetPath, projectID)
+		if err != nil {
+			return err
+		}
+
+		q.quotas[targetPath] = projectID
+		q.nextProjectID++
+	}
+
+	//
+	// set the quota limit for the container's project id
+	//
+	logrus.Debugf("SetQuota(%s, %d): projectID=%d", targetPath, quota.Size, projectID)
+	return setProjectQuota(q.backingFsBlockDev, projectID, quota)
+}
+
+// setProjectQuota - set the quota for project id on xfs block device
+func setProjectQuota(backingFsBlockDev string, projectID uint32, quota Quota) error {
+	var d C.fs_disk_quota_t
+	d.d_version = C.FS_DQUOT_VERSION
+	d.d_id = C.__u32(projectID)
+	d.d_flags = C.XFS_PROJ_QUOTA
+
+	d.d_fieldmask = C.FS_DQ_BHARD | C.FS_DQ_BSOFT
+	d.d_blk_hardlimit = C.__u64(quota.Size / 512)
+	d.d_blk_softlimit = d.d_blk_hardlimit
+
+	var cs = C.CString(backingFsBlockDev)
+	defer C.free(unsafe.Pointer(cs))
+
+	_, _, errno := syscall.Syscall6(syscall.SYS_QUOTACTL, C.Q_XSETPQLIM,
+		uintptr(unsafe.Pointer(cs)), uintptr(d.d_id),
+		uintptr(unsafe.Pointer(&d)), 0, 0)
+	if errno != 0 {
+		return fmt.Errorf("Failed to set quota limit for projid %d on %s: %v",
+			projectID, backingFsBlockDev, errno.Error())
+	}
+
+	return nil
+}
+
+// GetQuota - get the quota limits of a directory that was configured with SetQuota
+func (q *QuotaCtl) GetQuota(targetPath string, quota *Quota) error {
+
+	projectID, ok := q.quotas[targetPath]
+	if !ok {
+		return fmt.Errorf("quota not found for path : %s", targetPath)
+	}
+
+	//
+	// get the quota limit for the container's project id
+	//
+	var d C.fs_disk_quota_t
+
+	var cs = C.CString(q.backingFsBlockDev)
+	defer C.free(unsafe.Pointer(cs))
+
+	_, _, errno := syscall.Syscall6(syscall.SYS_QUOTACTL, C.Q_XGETPQUOTA,
+		uintptr(unsafe.Pointer(cs)), uintptr(C.__u32(projectID)),
+		uintptr(unsafe.Pointer(&d)), 0, 0)
+	if errno != 0 {
+		return fmt.Errorf("Failed to get quota limit for projid %d on %s: %v",
+			projectID, q.backingFsBlockDev, errno.Error())
+	}
+	quota.Size = uint64(d.d_blk_hardlimit) * 512
+
+	return nil
+}
+
+// getProjectID - get the project id of path on xfs
+func getProjectID(targetPath string) (uint32, error) {
+	dir, err := openDir(targetPath)
+	if err != nil {
+		return 0, err
+	}
+	defer closeDir(dir)
+
+	var fsx C.struct_fsxattr
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
+		uintptr(unsafe.Pointer(&fsx)))
+	if errno != 0 {
+		return 0, fmt.Errorf("Failed to get projid for %s: %v", targetPath, errno.Error())
+	}
+
+	return uint32(fsx.fsx_projid), nil
+}
+
+// setProjectID - set the project id of path on xfs
+func setProjectID(targetPath string, projectID uint32) error {
+	dir, err := openDir(targetPath)
+	if err != nil {
+		return err
+	}
+	defer closeDir(dir)
+
+	var fsx C.struct_fsxattr
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
+		uintptr(unsafe.Pointer(&fsx)))
+	if errno != 0 {
+		return fmt.Errorf("Failed to get projid for %s: %v", targetPath, errno.Error())
+	}
+	fsx.fsx_projid = C.__u32(projectID)
+	fsx.fsx_xflags |= C.FS_XFLAG_PROJINHERIT
+	_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSSETXATTR,
+		uintptr(unsafe.Pointer(&fsx)))
+	if errno != 0 {
+		return fmt.Errorf("Failed to set projid for %s: %v", targetPath, errno.Error())
+	}
+
+	return nil
+}
+
+// findNextProjectID - find the next project id to be used for containers
+// by scanning driver home directory to find used project ids
+func (q *QuotaCtl) findNextProjectID(home string) error {
+	files, err := ioutil.ReadDir(home)
+	if err != nil {
+		return fmt.Errorf("read directory failed : %s", home)
+	}
+	for _, file := range files {
+		if !file.IsDir() {
+			continue
+		}
+		path := filepath.Join(home, file.Name())
+		projid, err := getProjectID(path)
+		if err != nil {
+			return err
+		}
+		if projid > 0 {
+			q.quotas[path] = projid
+		}
+		if q.nextProjectID <= projid {
+			q.nextProjectID = projid + 1
+		}
+	}
+
+	return nil
+}
+
+func free(p *C.char) {
+	C.free(unsafe.Pointer(p))
+}
+
+func openDir(path string) (*C.DIR, error) {
+	Cpath := C.CString(path)
+	defer free(Cpath)
+
+	dir := C.opendir(Cpath)
+	if dir == nil {
+		return nil, fmt.Errorf("Can't open dir")
+	}
+	return dir, nil
+}
+
+func closeDir(dir *C.DIR) {
+	if dir != nil {
+		C.closedir(dir)
+	}
+}
+
+func getDirFd(dir *C.DIR) uintptr {
+	return uintptr(C.dirfd(dir))
+}
+
+// Get the backing block device of the driver home directory
+// and create a block device node under the home directory
+// to be used by quotactl commands
+func makeBackingFsDev(home string) (string, error) {
+	fileinfo, err := os.Stat(home)
+	if err != nil {
+		return "", err
+	}
+
+	backingFsBlockDev := path.Join(home, "backingFsBlockDev")
+	// Re-create just in case comeone copied the home directory over to a new device
+	syscall.Unlink(backingFsBlockDev)
+	stat := fileinfo.Sys().(*syscall.Stat_t)
+	if err := syscall.Mknod(backingFsBlockDev, syscall.S_IFBLK|0600, int(stat.Dev)); err != nil {
+		return "", fmt.Errorf("Failed to mknod %s: %v", backingFsBlockDev, err)
+	}
+
+	return backingFsBlockDev, nil
+}

+ 7 - 2
docs/reference/commandline/create.md

@@ -167,8 +167,13 @@ Set storage driver options per container.
     $ docker create -it --storage-opt size=120G fedora /bin/bash
 
 This (size) will allow to set the container rootfs size to 120G at creation time. 
-User cannot pass a size less than the Default BaseFS Size. This option is only 
-available for the `devicemapper`, `btrfs`, `windowsfilter`, and `zfs` graph drivers.
+This option is only available for the `devicemapper`, `btrfs`, `overlay2`,
+`windowsfilter` and `zfs` graph drivers.
+For the `devicemapper`, `btrfs`, `windowsfilter` and `zfs` graph drivers,
+user cannot pass a size less than the Default BaseFS Size.
+For the `overlay2` storage driver, the size option is only available if the
+backing fs is `xfs` and mounted with the `pquota` mount option.
+Under these conditions, user can pass any size less then the backing fs size.
 
 ### Specify isolation technology for container (--isolation)
 

+ 7 - 2
docs/reference/commandline/run.md

@@ -194,8 +194,13 @@ The `-w` lets the command being executed inside directory given, here
     $ docker run -it --storage-opt size=120G fedora /bin/bash
 
 This (size) will allow to set the container rootfs size to 120G at creation time.
-User cannot pass a size less than the Default BaseFS Size. This option is only
-available for the `devicemapper`, `btrfs`, `windowsfilter`, and `zfs` graph drivers.
+This option is only available for the `devicemapper`, `btrfs`, `overlay2`,
+`windowsfilter` and `zfs` graph drivers.
+For the `devicemapper`, `btrfs`, `windowsfilter` and `zfs` graph drivers,
+user cannot pass a size less than the Default BaseFS Size.
+For the `overlay2` storage driver, the size option is only available if the
+backing fs is `xfs` and mounted with the `pquota` mount option.
+Under these conditions, user can pass any size less then the backing fs size.
 
 ### Mount tmpfs (--tmpfs)
 

+ 5 - 2
man/docker-create.1.md

@@ -343,8 +343,11 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
 
    $ docker create -it --storage-opt size=120G fedora /bin/bash
 
-   This (size) will allow to set the container rootfs size to 120G at creation time. User cannot pass a size less than the Default BaseFS Size.
-   This option is only available for the `devicemapper`, `btrfs`, and `zfs` graph drivers.
+   This (size) will allow to set the container rootfs size to 120G at creation time.
+   This option is only available for the `devicemapper`, `btrfs`, `overlay2` and `zfs` graph drivers.
+   For the `devicemapper`, `btrfs` and `zfs` storage drivers, user cannot pass a size less than the Default BaseFS Size.
+   For the `overlay2` storage driver, the size option is only available if the backing fs is `xfs` and mounted with the `pquota` mount option.
+   Under these conditions, user can pass any size less then the backing fs size.
   
 **--stop-signal**=*SIGTERM*
   Signal to stop a container. Default is SIGTERM.

+ 5 - 2
man/docker-run.1.md

@@ -493,8 +493,11 @@ incompatible with any restart policy other than `none`.
 
    $ docker run -it --storage-opt size=120G fedora /bin/bash
 
-   This (size) will allow to set the container rootfs size to 120G at creation time. User cannot pass a size less than the Default BaseFS Size.
-   This option is only available for the `devicemapper`, `btrfs`, and `zfs` graph drivers.
+   This (size) will allow to set the container rootfs size to 120G at creation time.
+   This option is only available for the `devicemapper`, `btrfs`, `overlay2`  and `zfs` graph drivers.
+   For the `devicemapper`, `btrfs` and `zfs` storage drivers, user cannot pass a size less than the Default BaseFS Size.
+   For the `overlay2` storage driver, the size option is only available if the backing fs is `xfs` and mounted with the `pquota` mount option.
+   Under these conditions, user can pass any size less then the backing fs size.
 
 **--stop-signal**=*SIGTERM*
   Signal to stop a container. Default is SIGTERM.