瀏覽代碼

aufs: retry unmount on EBUSY

For some reason, retrying to unmount in case of getting EBUSY error
was only performed in Remove(), but not Put().

I have done some testing on Ubuntu 16.04 and 18.04 with aufs,
performing massively parallel container creation using this script:

```
NUMCTS=5000
PARALLEL=100
IMAGE=busybox

docker pull $IMAGE >/dev/null
seq $NUMCTS | parallel -j$PARALLEL docker create $IMAGE true > /dev/null
docker ps -qa | shuf | tail -n $NUMCTS | parallel -j$PARALLEL docker rm -f '{}' > /dev/null
```

Sometimes (1 to 5 times per 10000 `docker create`), aufs.Put() fails on Unmount syscall
with EBUSY during container creation:

> Error response from daemon: device or resource busy

and in docker log, with debug turned on:

> level=debug msg="Failed to unmount ID-init aufs: device or resource busy"
> level=error msg="Handler for POST /v1.30/containers/create returned error: device or resource busy"

I did some debugging by running fuser -v -M -m $MOUNT_POINT but
that reveals nothing.

This commit:

 * implements retry on EBUSY in Unmount()
 * calls Unmount() from Remove()
 * increases the number of retries from 3 to 5

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
Kir Kolyshkin 6 年之前
父節點
當前提交
57f06409b1
共有 2 個文件被更改,包括 27 次插入34 次删除
  1. 3 28
      daemon/graphdriver/aufs/aufs.go
  2. 24 6
      daemon/graphdriver/aufs/mount.go

+ 3 - 28
daemon/graphdriver/aufs/aufs.go

@@ -34,7 +34,6 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
-	"time"
 
 	"github.com/docker/docker/daemon/graphdriver"
 	"github.com/docker/docker/pkg/archive"
@@ -307,33 +306,9 @@ func (a *Driver) Remove(id string) error {
 		mountpoint = a.getMountpoint(id)
 	}
 
-	logger := logger.WithField("layer", id)
-
-	var retries int
-	for {
-		mounted, err := a.mounted(mountpoint)
-		if err != nil {
-			return err
-		}
-		if !mounted {
-			break
-		}
-
-		err = a.unmount(mountpoint)
-		if err == nil {
-			break
-		}
-
-		if errors.Cause(err) != unix.EBUSY {
-			return errors.Wrap(err, "aufs: unmount error")
-		}
-		if retries >= 5 {
-			return errors.Wrap(err, "aufs: unmount error after retries")
-		}
-		// If unmount returns EBUSY, it could be a transient error. Sleep and retry.
-		retries++
-		logger.Warnf("unmount failed due to EBUSY: retry count: %d", retries)
-		time.Sleep(100 * time.Millisecond)
+	if err := a.unmount(mountpoint); err != nil {
+		logger.WithError(err).WithField("method", "Remove()").Warn()
+		return err
 	}
 
 	// Remove the layers file for the id

+ 24 - 6
daemon/graphdriver/aufs/mount.go

@@ -5,17 +5,18 @@ package aufs // import "github.com/docker/docker/daemon/graphdriver/aufs"
 import (
 	"os/exec"
 	"syscall"
+	"time"
 
 	"github.com/docker/docker/pkg/mount"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
 )
 
 // Unmount the target specified.
 func Unmount(target string) error {
-	const (
-		EINVAL  = 22 // if auplink returns this,
-		retries = 3  // retry a few times
-	)
+	const retries = 5
 
+	// auplink flush
 	for i := 0; ; i++ {
 		out, err := exec.Command("auplink", target, "flush").CombinedOutput()
 		if err == nil {
@@ -27,7 +28,7 @@ func Unmount(target string) error {
 				rc = status.ExitStatus()
 			}
 		}
-		if i >= retries || rc != EINVAL {
+		if i >= retries || rc != int(unix.EINVAL) {
 			logger.WithError(err).WithField("method", "Unmount").Warnf("auplink flush failed: %s", out)
 			break
 		}
@@ -37,5 +38,22 @@ func Unmount(target string) error {
 		logger.Debugf("auplink flush error (retrying %d/%d): %s", i+1, retries, out)
 	}
 
-	return mount.Unmount(target)
+	// unmount
+	var err error
+	for i := 0; i < retries; i++ {
+		err = mount.Unmount(target)
+		switch errors.Cause(err) {
+		case nil:
+			return nil
+		case unix.EBUSY:
+			logger.Debugf("aufs unmount %s failed with EBUSY (retrying %d/%d)", target, i+1, retries)
+			time.Sleep(100 * time.Millisecond)
+			continue // try again
+		default:
+			// any other error is fatal
+			break
+		}
+	}
+
+	return err
 }