瀏覽代碼

devmapper: Avoid AB-BA deadlock

We currently drop the global lock while holding a per-device lock when
waiting for device removal, and then we re-aquire it when the sleep is done.
This is causing a AB-BA deadlock if anyone at the same time tries to do any
operation on that device like this:

thread A:             thread B
grabs global lock
grabs device lock
releases global lock
sleeps
                      grabs global lock
                      blocks on device lock
wakes up
blocks on global lock

To trigger this you can for instance do:

ID=`docker run -d fedora sleep 5`
cd /var/lib/docker/devicemapper/mnt/$ID
docker wait $ID
docker rm $ID &
docker rm $ID

The unmount will fail due to the mount being busy thus causing the
timeout and the second rm will then trigger the deadlock.

We fix this by adding a lock ordering such that the device locks
are always grabbed before the global lock. This is safe since the
device lookups now have a separate lock.

Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
Alexander Larsson 11 年之前
父節點
當前提交
2ffef1b7eb
共有 1 個文件被更改,包括 35 次插入23 次删除
  1. 35 23
      runtime/graphdriver/devmapper/deviceset.go

+ 35 - 23
runtime/graphdriver/devmapper/deviceset.go

@@ -47,6 +47,11 @@ type DevInfo struct {
 	// sometimes release that lock while sleeping. In that case
 	// sometimes release that lock while sleeping. In that case
 	// this per-device lock is still held, protecting against
 	// this per-device lock is still held, protecting against
 	// other accesses to the device that we're doing the wait on.
 	// other accesses to the device that we're doing the wait on.
+	//
+	// WARNING: In order to avoid AB-BA deadlocks when releasing
+	// the global lock while holding the per-device locks all
+	// device locks must be aquired *before* the device lock, and
+	// multiple device locks should be aquired parent before child.
 	lock sync.Mutex `json:"-"`
 	lock sync.Mutex `json:"-"`
 }
 }
 
 
@@ -580,13 +585,6 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
 }
 }
 
 
 func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
 func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
-	devices.Lock()
-	defer devices.Unlock()
-
-	if info, _ := devices.lookupDevice(hash); info != nil {
-		return fmt.Errorf("device %s already exists", hash)
-	}
-
 	baseInfo, err := devices.lookupDevice(baseHash)
 	baseInfo, err := devices.lookupDevice(baseHash)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
@@ -595,6 +593,13 @@ func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
 	baseInfo.lock.Lock()
 	baseInfo.lock.Lock()
 	defer baseInfo.lock.Unlock()
 	defer baseInfo.lock.Unlock()
 
 
+	devices.Lock()
+	defer devices.Unlock()
+
+	if info, _ := devices.lookupDevice(hash); info != nil {
+		return fmt.Errorf("device %s already exists", hash)
+	}
+
 	deviceId := devices.allocateDeviceId()
 	deviceId := devices.allocateDeviceId()
 
 
 	if err := devices.createSnapDevice(devices.getPoolDevName(), deviceId, baseInfo.Name(), baseInfo.DeviceId); err != nil {
 	if err := devices.createSnapDevice(devices.getPoolDevName(), deviceId, baseInfo.Name(), baseInfo.DeviceId); err != nil {
@@ -658,9 +663,6 @@ func (devices *DeviceSet) deleteDevice(info *DevInfo) error {
 }
 }
 
 
 func (devices *DeviceSet) DeleteDevice(hash string) error {
 func (devices *DeviceSet) DeleteDevice(hash string) error {
-	devices.Lock()
-	defer devices.Unlock()
-
 	info, err := devices.lookupDevice(hash)
 	info, err := devices.lookupDevice(hash)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
@@ -669,6 +671,9 @@ func (devices *DeviceSet) DeleteDevice(hash string) error {
 	info.lock.Lock()
 	info.lock.Lock()
 	defer info.lock.Unlock()
 	defer info.lock.Unlock()
 
 
+	devices.Lock()
+	defer devices.Unlock()
+
 	return devices.deleteDevice(info)
 	return devices.deleteDevice(info)
 }
 }
 
 
@@ -802,8 +807,6 @@ func (devices *DeviceSet) waitClose(info *DevInfo) error {
 }
 }
 
 
 func (devices *DeviceSet) Shutdown() error {
 func (devices *DeviceSet) Shutdown() error {
-	devices.Lock()
-	defer devices.Unlock()
 
 
 	utils.Debugf("[deviceset %s] shutdown()", devices.devicePrefix)
 	utils.Debugf("[deviceset %s] shutdown()", devices.devicePrefix)
 	utils.Debugf("[devmapper] Shutting down DeviceSet: %s", devices.root)
 	utils.Debugf("[devmapper] Shutting down DeviceSet: %s", devices.root)
@@ -827,31 +830,36 @@ func (devices *DeviceSet) Shutdown() error {
 				utils.Debugf("Shutdown unmounting %s, error: %s\n", info.mountPath, err)
 				utils.Debugf("Shutdown unmounting %s, error: %s\n", info.mountPath, err)
 			}
 			}
 
 
+			devices.Lock()
 			if err := devices.deactivateDevice(info); err != nil {
 			if err := devices.deactivateDevice(info); err != nil {
 				utils.Debugf("Shutdown deactivate %s , error: %s\n", info.Hash, err)
 				utils.Debugf("Shutdown deactivate %s , error: %s\n", info.Hash, err)
 			}
 			}
+			devices.Unlock()
 		}
 		}
 		info.lock.Unlock()
 		info.lock.Unlock()
 	}
 	}
 
 
 	info, _ := devices.lookupDevice("")
 	info, _ := devices.lookupDevice("")
 	if info != nil {
 	if info != nil {
+		info.lock.Lock()
+		devices.Lock()
 		if err := devices.deactivateDevice(info); err != nil {
 		if err := devices.deactivateDevice(info); err != nil {
 			utils.Debugf("Shutdown deactivate base , error: %s\n", err)
 			utils.Debugf("Shutdown deactivate base , error: %s\n", err)
 		}
 		}
+		devices.Unlock()
+		info.lock.Unlock()
 	}
 	}
 
 
+	devices.Lock()
 	if err := devices.deactivatePool(); err != nil {
 	if err := devices.deactivatePool(); err != nil {
 		utils.Debugf("Shutdown deactivate pool , error: %s\n", err)
 		utils.Debugf("Shutdown deactivate pool , error: %s\n", err)
 	}
 	}
+	devices.Unlock()
 
 
 	return nil
 	return nil
 }
 }
 
 
 func (devices *DeviceSet) MountDevice(hash, path string, mountLabel string) error {
 func (devices *DeviceSet) MountDevice(hash, path string, mountLabel string) error {
-	devices.Lock()
-	defer devices.Unlock()
-
 	info, err := devices.lookupDevice(hash)
 	info, err := devices.lookupDevice(hash)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
@@ -860,6 +868,9 @@ func (devices *DeviceSet) MountDevice(hash, path string, mountLabel string) erro
 	info.lock.Lock()
 	info.lock.Lock()
 	defer info.lock.Unlock()
 	defer info.lock.Unlock()
 
 
+	devices.Lock()
+	defer devices.Unlock()
+
 	if info.mountCount > 0 {
 	if info.mountCount > 0 {
 		if path != info.mountPath {
 		if path != info.mountPath {
 			return fmt.Errorf("Trying to mount devmapper device in multple places (%s, %s)", info.mountPath, path)
 			return fmt.Errorf("Trying to mount devmapper device in multple places (%s, %s)", info.mountPath, path)
@@ -900,8 +911,6 @@ func (devices *DeviceSet) MountDevice(hash, path string, mountLabel string) erro
 func (devices *DeviceSet) UnmountDevice(hash string, mode UnmountMode) error {
 func (devices *DeviceSet) UnmountDevice(hash string, mode UnmountMode) error {
 	utils.Debugf("[devmapper] UnmountDevice(hash=%s, mode=%d)", hash, mode)
 	utils.Debugf("[devmapper] UnmountDevice(hash=%s, mode=%d)", hash, mode)
 	defer utils.Debugf("[devmapper] UnmountDevice END")
 	defer utils.Debugf("[devmapper] UnmountDevice END")
-	devices.Lock()
-	defer devices.Unlock()
 
 
 	info, err := devices.lookupDevice(hash)
 	info, err := devices.lookupDevice(hash)
 	if err != nil {
 	if err != nil {
@@ -911,6 +920,9 @@ func (devices *DeviceSet) UnmountDevice(hash string, mode UnmountMode) error {
 	info.lock.Lock()
 	info.lock.Lock()
 	defer info.lock.Unlock()
 	defer info.lock.Unlock()
 
 
+	devices.Lock()
+	defer devices.Unlock()
+
 	if mode == UnmountFloat {
 	if mode == UnmountFloat {
 		if info.floating {
 		if info.floating {
 			return fmt.Errorf("UnmountDevice: can't float floating reference %s\n", hash)
 			return fmt.Errorf("UnmountDevice: can't float floating reference %s\n", hash)
@@ -971,9 +983,6 @@ func (devices *DeviceSet) HasInitializedDevice(hash string) bool {
 }
 }
 
 
 func (devices *DeviceSet) HasActivatedDevice(hash string) bool {
 func (devices *DeviceSet) HasActivatedDevice(hash string) bool {
-	devices.Lock()
-	defer devices.Unlock()
-
 	info, _ := devices.lookupDevice(hash)
 	info, _ := devices.lookupDevice(hash)
 	if info == nil {
 	if info == nil {
 		return false
 		return false
@@ -982,6 +991,9 @@ func (devices *DeviceSet) HasActivatedDevice(hash string) bool {
 	info.lock.Lock()
 	info.lock.Lock()
 	defer info.lock.Unlock()
 	defer info.lock.Unlock()
 
 
+	devices.Lock()
+	defer devices.Unlock()
+
 	devinfo, _ := getInfo(info.Name())
 	devinfo, _ := getInfo(info.Name())
 	return devinfo != nil && devinfo.Exists != 0
 	return devinfo != nil && devinfo.Exists != 0
 }
 }
@@ -1026,9 +1038,6 @@ func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSec
 }
 }
 
 
 func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
 func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
-	devices.Lock()
-	defer devices.Unlock()
-
 	info, err := devices.lookupDevice(hash)
 	info, err := devices.lookupDevice(hash)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
@@ -1037,6 +1046,9 @@ func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
 	info.lock.Lock()
 	info.lock.Lock()
 	defer info.lock.Unlock()
 	defer info.lock.Unlock()
 
 
+	devices.Lock()
+	defer devices.Unlock()
+
 	status := &DevStatus{
 	status := &DevStatus{
 		DeviceId:      info.DeviceId,
 		DeviceId:      info.DeviceId,
 		Size:          info.Size,
 		Size:          info.Size,