daemon: fix tsk.Delete blocking during container exit
A call to tsk.Delete() in handleContainerExit can block idefinitely due to blocked output streams of the container. To fix that, close container output streams if Delete call does not return for more than 3 seconds (and continue waiting for Delete to complete after that). Signed-off-by: Daniil Sigalov <asterite@seclab.cs.msu.ru>
This commit is contained in:
parent
695ebbafd8
commit
bc906c8a12
1 changed files with 34 additions and 13 deletions
|
@ -27,7 +27,10 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
|
|||
}
|
||||
|
||||
func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
|
||||
var exitStatus container.ExitStatus
|
||||
var (
|
||||
exitStatus container.ExitStatus
|
||||
taskDeletionDone chan struct{}
|
||||
)
|
||||
c.Lock()
|
||||
|
||||
cfg := daemon.config()
|
||||
|
@ -38,19 +41,33 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
|
|||
|
||||
tsk, ok := c.Task()
|
||||
if ok {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
es, err := tsk.Delete(ctx)
|
||||
cancel()
|
||||
if err != nil {
|
||||
log.G(ctx).WithFields(log.Fields{
|
||||
"error": err,
|
||||
"container": c.ID,
|
||||
}).Warn("failed to delete container from containerd")
|
||||
} else {
|
||||
exitStatus = container.ExitStatus{
|
||||
ExitCode: int(es.ExitCode()),
|
||||
ExitedAt: es.ExitTime(),
|
||||
taskDeletionDone = make(chan struct{})
|
||||
go func() {
|
||||
defer close(taskDeletionDone)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
es, err := tsk.Delete(ctx)
|
||||
cancel()
|
||||
if err != nil {
|
||||
log.G(ctx).WithFields(log.Fields{
|
||||
"error": err,
|
||||
"container": c.ID,
|
||||
}).Warn("failed to delete container from containerd")
|
||||
} else {
|
||||
exitStatus = container.ExitStatus{
|
||||
ExitCode: int(es.ExitCode()),
|
||||
ExitedAt: es.ExitTime(),
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
deletionIOCloseTimeout := time.NewTimer(3 * time.Second)
|
||||
select {
|
||||
case <-taskDeletionDone:
|
||||
deletionIOCloseTimeout.Stop()
|
||||
case <-deletionIOCloseTimeout.C:
|
||||
// if tsk.Delete(ctx) did not exit after 3 seconds, try to close IO
|
||||
// streams - they may be blocking the deletion - and continue
|
||||
// waiting after that
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,6 +79,10 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
|
|||
c.Reset(ctx, false)
|
||||
cancel()
|
||||
|
||||
if taskDeletionDone != nil {
|
||||
<-taskDeletionDone
|
||||
}
|
||||
|
||||
if e != nil {
|
||||
exitStatus.ExitCode = int(e.ExitCode)
|
||||
exitStatus.ExitedAt = e.ExitedAt
|
||||
|
|
Loading…
Reference in a new issue