Browse Source

Merge pull request #23497 from shishir-a412ed/dm_task_run_failed

Fixes Issue # 23418: Race condition between device deferred removal and resume device.
Sebastiaan van Stijn 9 years ago
parent
commit
87e48ecd04
2 changed files with 105 additions and 48 deletions
  1. 81 24
      daemon/graphdriver/devmapper/deviceset.go
  2. 24 24
      pkg/devicemapper/devmapper.go

+ 81 - 24
daemon/graphdriver/devmapper/deviceset.go

@@ -528,7 +528,7 @@ func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bo
 
 	// Make sure deferred removal on device is canceled, if one was
 	// scheduled.
-	if err := devices.cancelDeferredRemoval(info); err != nil {
+	if err := devices.cancelDeferredRemovalIfNeeded(info); err != nil {
 		return fmt.Errorf("devmapper: Device Deferred Removal Cancellation Failed: %s", err)
 	}
 
@@ -841,11 +841,56 @@ func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) {
 	return info, nil
 }
 
-func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo, size uint64) error {
-	if err := devices.poolHasFreeSpace(); err != nil {
+func (devices *DeviceSet) takeSnapshot(hash string, baseInfo *devInfo, size uint64) error {
+	var (
+		devinfo *devicemapper.Info
+		err     error
+	)
+
+	if err = devices.poolHasFreeSpace(); err != nil {
+		return err
+	}
+
+	if devices.deferredRemove {
+		devinfo, err = devicemapper.GetInfoWithDeferred(baseInfo.Name())
+		if err != nil {
+			return err
+		}
+		if devinfo != nil && devinfo.DeferredRemove != 0 {
+			err = devices.cancelDeferredRemoval(baseInfo)
+			if err != nil {
+				// If Error is ErrEnxio. Device is probably already gone. Continue.
+				if err != devicemapper.ErrEnxio {
+					return err
+				}
+			} else {
+				defer devices.deactivateDevice(baseInfo)
+			}
+		}
+	} else {
+		devinfo, err = devicemapper.GetInfo(baseInfo.Name())
+		if err != nil {
+			return err
+		}
+	}
+
+	doSuspend := devinfo != nil && devinfo.Exists != 0
+
+	if doSuspend {
+		if err = devicemapper.SuspendDevice(baseInfo.Name()); err != nil {
+			return err
+		}
+		defer devicemapper.ResumeDevice(baseInfo.Name())
+	}
+
+	if err = devices.createRegisterSnapDevice(hash, baseInfo, size); err != nil {
 		return err
 	}
 
+	return nil
+}
+
+func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo, size uint64) error {
 	deviceID, err := devices.getNextFreeDeviceID()
 	if err != nil {
 		return err
@@ -858,7 +903,7 @@ func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInf
 	}
 
 	for {
-		if err := devicemapper.CreateSnapDevice(devices.getPoolDevName(), deviceID, baseInfo.Name(), baseInfo.DeviceID); err != nil {
+		if err := devicemapper.CreateSnapDeviceRaw(devices.getPoolDevName(), deviceID, baseInfo.DeviceID); err != nil {
 			if devicemapper.DeviceIDExists(err) {
 				// Device ID already exists. This should not
 				// happen. Now we have a mechanism to find
@@ -1886,7 +1931,7 @@ func (devices *DeviceSet) AddDevice(hash, baseHash string, storageOpt map[string
 		return fmt.Errorf("devmapper: Container size cannot be smaller than %s", units.HumanSize(float64(baseInfo.Size)))
 	}
 
-	if err := devices.createRegisterSnapDevice(hash, baseInfo, size); err != nil {
+	if err := devices.takeSnapshot(hash, baseInfo, size); err != nil {
 		return err
 	}
 
@@ -2128,41 +2173,53 @@ func (devices *DeviceSet) removeDevice(devname string) error {
 	return err
 }
 
-func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error {
+func (devices *DeviceSet) cancelDeferredRemovalIfNeeded(info *devInfo) error {
 	if !devices.deferredRemove {
 		return nil
 	}
 
-	logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name())
-	defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name())
+	logrus.Debugf("devmapper: cancelDeferredRemovalIfNeeded START(%s)", info.Name())
+	defer logrus.Debugf("devmapper: cancelDeferredRemovalIfNeeded END(%s)", info.Name())
 
 	devinfo, err := devicemapper.GetInfoWithDeferred(info.Name())
+	if err != nil {
+		return err
+	}
 
 	if devinfo != nil && devinfo.DeferredRemove == 0 {
 		return nil
 	}
 
 	// Cancel deferred remove
-	for i := 0; i < 100; i++ {
-		err = devicemapper.CancelDeferredRemove(info.Name())
-		if err == nil {
-			break
+	if err := devices.cancelDeferredRemoval(info); err != nil {
+		// If Error is ErrEnxio. Device is probably already gone. Continue.
+		if err != devicemapper.ErrEnxio {
+			return err
 		}
+	}
+	return nil
+}
 
-		if err == devicemapper.ErrEnxio {
-			// Device is probably already gone. Return success.
-			return nil
-		}
+func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error {
+	logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name())
+	defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name())
 
-		if err != devicemapper.ErrBusy {
-			return err
-		}
+	var err error
 
-		// If we see EBUSY it may be a transient error,
-		// sleep a bit a retry a few times.
-		devices.Unlock()
-		time.Sleep(100 * time.Millisecond)
-		devices.Lock()
+	// Cancel deferred remove
+	for i := 0; i < 100; i++ {
+		err = devicemapper.CancelDeferredRemove(info.Name())
+		if err != nil {
+			if err == devicemapper.ErrBusy {
+				// If we see EBUSY it may be a transient error,
+				// sleep a bit a retry a few times.
+				devices.Unlock()
+				time.Sleep(100 * time.Millisecond)
+				devices.Lock()
+				continue
+			}
+		}
+		break
 	}
 	return err
 }

+ 24 - 24
pkg/devicemapper/devmapper.go

@@ -750,51 +750,51 @@ func activateDevice(poolName string, name string, deviceID int, size uint64, ext
 	return nil
 }
 
-// CreateSnapDevice creates a snapshot based on the device identified by the baseName and baseDeviceId,
-func CreateSnapDevice(poolName string, deviceID int, baseName string, baseDeviceID int) error {
-	devinfo, _ := GetInfo(baseName)
-	doSuspend := devinfo != nil && devinfo.Exists != 0
-
-	if doSuspend {
-		if err := SuspendDevice(baseName); err != nil {
-			return err
-		}
-	}
-
+// CreateSnapDeviceRaw creates a snapshot device. Caller needs to suspend and resume the origin device if it is active.
+func CreateSnapDeviceRaw(poolName string, deviceID int, baseDeviceID int) error {
 	task, err := TaskCreateNamed(deviceTargetMsg, poolName)
 	if task == nil {
-		if doSuspend {
-			ResumeDevice(baseName)
-		}
 		return err
 	}
 
 	if err := task.setSector(0); err != nil {
-		if doSuspend {
-			ResumeDevice(baseName)
-		}
 		return fmt.Errorf("devicemapper: Can't set sector %s", err)
 	}
 
 	if err := task.setMessage(fmt.Sprintf("create_snap %d %d", deviceID, baseDeviceID)); err != nil {
-		if doSuspend {
-			ResumeDevice(baseName)
-		}
 		return fmt.Errorf("devicemapper: Can't set message %s", err)
 	}
 
 	dmSawExist = false // reset before the task is run
 	if err := task.run(); err != nil {
-		if doSuspend {
-			ResumeDevice(baseName)
-		}
 		// Caller wants to know about ErrDeviceIDExists so that it can try with a different device id.
 		if dmSawExist {
 			return ErrDeviceIDExists
 		}
-
 		return fmt.Errorf("devicemapper: Error running deviceCreate (createSnapDevice) %s", err)
+	}
+
+	return nil
+}
+
+// CreateSnapDevice creates a snapshot based on the device identified by the baseName and baseDeviceId,
+func CreateSnapDevice(poolName string, deviceID int, baseName string, baseDeviceID int) error {
+	devinfo, _ := GetInfo(baseName)
+	doSuspend := devinfo != nil && devinfo.Exists != 0
 
+	if doSuspend {
+		if err := SuspendDevice(baseName); err != nil {
+			return err
+		}
+	}
+
+	if err := CreateSnapDeviceRaw(poolName, deviceID, baseDeviceID); err != nil {
+		if doSuspend {
+			if err2 := ResumeDevice(baseName); err2 != nil {
+				return fmt.Errorf("CreateSnapDeviceRaw Error: (%v): ResumeDevice Error: (%v)", err, err2)
+			}
+		}
+		return err
 	}
 
 	if doSuspend {