Quellcode durchsuchen

daemon: fix tsk.Delete blocking during container exit

A call to tsk.Delete() in handleContainerExit can block
idefinitely due to blocked output streams of the container. To fix
that, close container output streams if Delete call does not
return for more than 3 seconds (and continue waiting for Delete to
complete after that).

Signed-off-by: Daniil Sigalov <asterite@seclab.cs.msu.ru>
Daniil Sigalov vor 1 Jahr
Ursprung
Commit
bc906c8a12
1 geänderte Dateien mit 34 neuen und 13 gelöschten Zeilen
  1. 34 13
      daemon/monitor.go

+ 34 - 13
daemon/monitor.go

@@ -27,7 +27,10 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
 }
 
 func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
-	var exitStatus container.ExitStatus
+	var (
+		exitStatus       container.ExitStatus
+		taskDeletionDone chan struct{}
+	)
 	c.Lock()
 
 	cfg := daemon.config()
@@ -38,19 +41,33 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
 
 	tsk, ok := c.Task()
 	if ok {
-		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
-		es, err := tsk.Delete(ctx)
-		cancel()
-		if err != nil {
-			log.G(ctx).WithFields(log.Fields{
-				"error":     err,
-				"container": c.ID,
-			}).Warn("failed to delete container from containerd")
-		} else {
-			exitStatus = container.ExitStatus{
-				ExitCode: int(es.ExitCode()),
-				ExitedAt: es.ExitTime(),
+		taskDeletionDone = make(chan struct{})
+		go func() {
+			defer close(taskDeletionDone)
+			ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+			es, err := tsk.Delete(ctx)
+			cancel()
+			if err != nil {
+				log.G(ctx).WithFields(log.Fields{
+					"error":     err,
+					"container": c.ID,
+				}).Warn("failed to delete container from containerd")
+			} else {
+				exitStatus = container.ExitStatus{
+					ExitCode: int(es.ExitCode()),
+					ExitedAt: es.ExitTime(),
+				}
 			}
+		}()
+
+		deletionIOCloseTimeout := time.NewTimer(3 * time.Second)
+		select {
+		case <-taskDeletionDone:
+			deletionIOCloseTimeout.Stop()
+		case <-deletionIOCloseTimeout.C:
+			// if tsk.Delete(ctx) did not exit after 3 seconds, try to close IO
+			// streams - they may be blocking the deletion - and continue
+			// waiting after that
 		}
 	}
 
@@ -62,6 +79,10 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
 	c.Reset(ctx, false)
 	cancel()
 
+	if taskDeletionDone != nil {
+		<-taskDeletionDone
+	}
+
 	if e != nil {
 		exitStatus.ExitCode = int(e.ExitCode)
 		exitStatus.ExitedAt = e.ExitedAt