Browse Source

Add zero-copy support to copy module

This changeset allows Docker's VFS, and Overlay to take advantage of
Linux's zerocopy APIs.

The copy function first tries to use the ficlone ioctl. Reason being:
 - they do not allow partial success (aka short writes)
 - clones are expected to be a fast metadata operation
See: http://oss.sgi.com/archives/xfs/2015-12/msg00356.html

If the clone fails, we fall back to copy_file_range, which internally
may fall back to splice, which has an upper limit on the size
of copy it can perform. Given that, we have to loop until the copy
is done.

For a given dirCopy operation, if the clone fails, we will not try
it again during any other file copy. Same is true with copy_file_range.

If all else fails, we fall back to traditional copy.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Sargun Dhillon 8 năm trước cách đây
mục cha
commit
3ec4ec2857
2 tập tin đã thay đổi với 127 bổ sung8 xóa
  1. 60 8
      daemon/graphdriver/copy/copy.go
  2. 67 0
      daemon/graphdriver/copy/copy_test.go

+ 60 - 8
daemon/graphdriver/copy/copy.go

@@ -2,8 +2,17 @@
 
 package copy
 
+/*
+#include <linux/fs.h>
+
+#ifndef FICLONE
+#define FICLONE		_IOW(0x94, 9, int)
+#endif
+*/
+import "C"
 import (
 	"fmt"
+	"io"
 	"os"
 	"path/filepath"
 	"syscall"
@@ -15,6 +24,7 @@ import (
 	"golang.org/x/sys/unix"
 )
 
+// Mode indicates whether to use hardlink or copy content
 type Mode int
 
 const (
@@ -24,20 +34,61 @@ const (
 	Hardlink
 )
 
-func copyRegular(srcPath, dstPath string, mode os.FileMode) error {
+func copyRegular(srcPath, dstPath string, fileinfo os.FileInfo, copyWithFileRange, copyWithFileClone *bool) error {
 	srcFile, err := os.Open(srcPath)
 	if err != nil {
 		return err
 	}
 	defer srcFile.Close()
 
-	dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE, mode)
+	// If the destination file already exists, we shouldn't blow it away
+	dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, fileinfo.Mode())
 	if err != nil {
 		return err
 	}
 	defer dstFile.Close()
 
-	_, err = pools.Copy(dstFile, srcFile)
+	if *copyWithFileClone {
+		_, _, err = unix.Syscall(unix.SYS_IOCTL, dstFile.Fd(), C.FICLONE, srcFile.Fd())
+		if err == nil {
+			return nil
+		}
+
+		*copyWithFileClone = false
+		if err == unix.EXDEV {
+			*copyWithFileRange = false
+		}
+	}
+	if *copyWithFileRange {
+		err = doCopyWithFileRange(srcFile, dstFile, fileinfo)
+		// Trying the file_clone may not have caught the exdev case
+		// as the ioctl may not have been available (therefore EINVAL)
+		if err == unix.EXDEV || err == unix.ENOSYS {
+			*copyWithFileRange = false
+		} else if err != nil {
+			return err
+		}
+	}
+	return legacyCopy(srcFile, dstFile)
+}
+
+func doCopyWithFileRange(srcFile, dstFile *os.File, fileinfo os.FileInfo) error {
+	amountLeftToCopy := fileinfo.Size()
+
+	for amountLeftToCopy > 0 {
+		n, err := unix.CopyFileRange(int(srcFile.Fd()), nil, int(dstFile.Fd()), nil, int(amountLeftToCopy), 0)
+		if err != nil {
+			return err
+		}
+
+		amountLeftToCopy = amountLeftToCopy - int64(n)
+	}
+
+	return nil
+}
+
+func legacyCopy(srcFile io.Reader, dstFile io.Writer) error {
+	_, err := pools.Copy(dstFile, srcFile)
 
 	return err
 }
@@ -58,6 +109,8 @@ func copyXattr(srcPath, dstPath, attr string) error {
 // DirCopy copies or hardlinks the contents of one directory to another,
 // properly handling xattrs, and soft links
 func DirCopy(srcDir, dstDir string, copyMode Mode) error {
+	copyWithFileRange := true
+	copyWithFileClone := true
 	err := filepath.Walk(srcDir, func(srcPath string, f os.FileInfo, err error) error {
 		if err != nil {
 			return err
@@ -85,13 +138,12 @@ func DirCopy(srcDir, dstDir string, copyMode Mode) error {
 		case 0: // Regular file
 			if copyMode == Hardlink {
 				isHardlink = true
-				if err := os.Link(srcPath, dstPath); err != nil {
-					return err
+				if err2 := os.Link(srcPath, dstPath); err2 != nil {
+					return err2
 				}
 			} else {
-				// Always fall back to Content copymode
-				if err := copyRegular(srcPath, dstPath, f.Mode()); err != nil {
-					return err
+				if err2 := copyRegular(srcPath, dstPath, f, &copyWithFileRange, &copyWithFileClone); err2 != nil {
+					return err2
 				}
 			}
 

+ 67 - 0
daemon/graphdriver/copy/copy_test.go

@@ -0,0 +1,67 @@
+// +build linux
+
+package copy
+
+import (
+	"io/ioutil"
+	"math/rand"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/docker/docker/pkg/parsers/kernel"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestIsCopyFileRangeSyscallAvailable(t *testing.T) {
+	// Verifies:
+	// 1. That copyFileRangeEnabled is being set to true when copy_file_range syscall is available
+	// 2. That isCopyFileRangeSyscallAvailable() works on "new" kernels
+	v, err := kernel.GetKernelVersion()
+	require.NoError(t, err)
+
+	copyWithFileRange := true
+	copyWithFileClone := false
+	doCopyTest(t, &copyWithFileRange, &copyWithFileClone)
+
+	if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 5, Minor: 0}) < 0 {
+		assert.False(t, copyWithFileRange)
+	} else {
+		assert.True(t, copyWithFileRange)
+	}
+
+}
+
+func TestCopy(t *testing.T) {
+	copyWithFileRange := true
+	copyWithFileClone := true
+	doCopyTest(t, &copyWithFileRange, &copyWithFileClone)
+}
+
+func TestCopyWithoutRange(t *testing.T) {
+	copyWithFileRange := false
+	copyWithFileClone := false
+	doCopyTest(t, &copyWithFileRange, &copyWithFileClone)
+}
+
+func doCopyTest(t *testing.T, copyWithFileRange, copyWithFileClone *bool) {
+	dir, err := ioutil.TempDir("", "docker-copy-check")
+	require.NoError(t, err)
+	defer os.RemoveAll(dir)
+	srcFilename := filepath.Join(dir, "srcFilename")
+	dstFilename := filepath.Join(dir, "dstilename")
+
+	r := rand.New(rand.NewSource(0))
+	buf := make([]byte, 1024)
+	_, err = r.Read(buf)
+	require.NoError(t, err)
+	require.NoError(t, ioutil.WriteFile(srcFilename, buf, 0777))
+	fileinfo, err := os.Stat(srcFilename)
+	require.NoError(t, err)
+
+	require.NoError(t, copyRegular(srcFilename, dstFilename, fileinfo, copyWithFileRange, copyWithFileClone))
+	readBuf, err := ioutil.ReadFile(dstFilename)
+	require.NoError(t, err)
+	assert.Equal(t, buf, readBuf)
+}