Ver Fonte

daemon: fix tsk.Delete blocking during container exit

A call to tsk.Delete() in handleContainerExit can block
idefinitely due to blocked output streams of the container. To fix
that, close container output streams if Delete call does not
return for more than 3 seconds (and continue waiting for Delete to
complete after that).

Signed-off-by: Daniil Sigalov <asterite@seclab.cs.msu.ru>
Daniil Sigalov há 1 ano atrás
pai
commit
bc906c8a12
1 ficheiros alterados com 34 adições e 13 exclusões
  1. 34 13
      daemon/monitor.go

+ 34 - 13
daemon/monitor.go

@@ -27,7 +27,10 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
 }
 }
 
 
 func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
 func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
-	var exitStatus container.ExitStatus
+	var (
+		exitStatus       container.ExitStatus
+		taskDeletionDone chan struct{}
+	)
 	c.Lock()
 	c.Lock()
 
 
 	cfg := daemon.config()
 	cfg := daemon.config()
@@ -38,19 +41,33 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
 
 
 	tsk, ok := c.Task()
 	tsk, ok := c.Task()
 	if ok {
 	if ok {
-		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
-		es, err := tsk.Delete(ctx)
-		cancel()
-		if err != nil {
-			log.G(ctx).WithFields(log.Fields{
-				"error":     err,
-				"container": c.ID,
-			}).Warn("failed to delete container from containerd")
-		} else {
-			exitStatus = container.ExitStatus{
-				ExitCode: int(es.ExitCode()),
-				ExitedAt: es.ExitTime(),
+		taskDeletionDone = make(chan struct{})
+		go func() {
+			defer close(taskDeletionDone)
+			ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+			es, err := tsk.Delete(ctx)
+			cancel()
+			if err != nil {
+				log.G(ctx).WithFields(log.Fields{
+					"error":     err,
+					"container": c.ID,
+				}).Warn("failed to delete container from containerd")
+			} else {
+				exitStatus = container.ExitStatus{
+					ExitCode: int(es.ExitCode()),
+					ExitedAt: es.ExitTime(),
+				}
 			}
 			}
+		}()
+
+		deletionIOCloseTimeout := time.NewTimer(3 * time.Second)
+		select {
+		case <-taskDeletionDone:
+			deletionIOCloseTimeout.Stop()
+		case <-deletionIOCloseTimeout.C:
+			// if tsk.Delete(ctx) did not exit after 3 seconds, try to close IO
+			// streams - they may be blocking the deletion - and continue
+			// waiting after that
 		}
 		}
 	}
 	}
 
 
@@ -62,6 +79,10 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
 	c.Reset(ctx, false)
 	c.Reset(ctx, false)
 	cancel()
 	cancel()
 
 
+	if taskDeletionDone != nil {
+		<-taskDeletionDone
+	}
+
 	if e != nil {
 	if e != nil {
 		exitStatus.ExitCode = int(e.ExitCode)
 		exitStatus.ExitCode = int(e.ExitCode)
 		exitStatus.ExitedAt = e.ExitedAt
 		exitStatus.ExitedAt = e.ExitedAt