ソースを参照

Merge pull request #22544 from Microsoft/jjh/terminate

Windows: Terminate on failed shutdown, fixes dockerd deadlock
Alexander Morozov 9 年 前
コミット
e811e9784f
2 ファイル変更19 行追加2 行削除
  1. 13 1
      libcontainerd/client_windows.go
  2. 6 1
      libcontainerd/container_windows.go

+ 13 - 1
libcontainerd/client_windows.go

@@ -362,7 +362,19 @@ func (clnt *client) Signal(containerID string, sig int) error {
 		}
 
 		// Shutdown the compute system
-		if err := hcsshim.ShutdownComputeSystem(containerID, hcsshim.TimeoutInfinite, context); err != nil {
+		const shutdownTimeout = 5 * 60 * 1000 // 5 minutes
+		if err := hcsshim.ShutdownComputeSystem(containerID, shutdownTimeout, context); err != nil {
+			if herr, ok := err.(*hcsshim.HcsError); !ok ||
+				(herr.Err != hcsshim.ERROR_SHUTDOWN_IN_PROGRESS &&
+					herr.Err != ErrorBadPathname &&
+					herr.Err != syscall.ERROR_PATH_NOT_FOUND) {
+				logrus.Debugf("signal - error from ShutdownComputeSystem %v on %s. Calling TerminateComputeSystem", err, containerID)
+				if err := hcsshim.TerminateComputeSystem(containerID, shutdownTimeout, "signal"); err != nil {
+					logrus.Debugf("signal - ignoring error from TerminateComputeSystem on %s %v", containerID, err)
+				} else {
+					logrus.Debugf("Successful TerminateComputeSystem after failed ShutdownComputeSystem on %s during signal %v", containerID, sig)
+				}
+			}
 			logrus.Errorf("Failed to shutdown %s - %q", containerID, err)
 		}
 	}

+ 6 - 1
libcontainerd/container_windows.go

@@ -185,7 +185,12 @@ func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstPr
 				(herr.Err != hcsshim.ERROR_SHUTDOWN_IN_PROGRESS &&
 					herr.Err != ErrorBadPathname &&
 					herr.Err != syscall.ERROR_PATH_NOT_FOUND) {
-				logrus.Warnf("Ignoring error from ShutdownComputeSystem %s", err)
+				logrus.Debugf("waitExit - error from ShutdownComputeSystem on %s %v. Calling TerminateComputeSystem", ctr.containerCommon, err)
+				if err := hcsshim.TerminateComputeSystem(ctr.containerID, shutdownTimeout, "waitExit"); err != nil {
+					logrus.Debugf("waitExit - ignoring error from TerminateComputeSystem %s %v", ctr.containerID, err)
+				} else {
+					logrus.Debugf("Successful TerminateComputeSystem after failed ShutdownComputeSystem on %s in waitExit", ctr.containerID)
+				}
 			}
 		} else {
 			logrus.Debugf("Completed shutting down container %s", ctr.containerID)