container: add a timeout for deletion
On a heavily loaded host, we were experiencing long container(d) deletion times: containerd[8907]: time="2024-03-25T13:47:45.938479195Z" level=debug msg="event forwarded" ns=moby topic=/tasks/exit type=containerd.events.TaskExit # our control plane logic deletes the successfully exited container via # the docker API, and... containerd[8907]: time="2024-03-25T13:47:47.202055216Z" level=debug msg="failed to delete task" error="context deadline exceeded" id=a618057629b35e3bfea82d5ce4cbb057ba979498496428dfe6935a1322b94add Before4bafaa00aa
("Refactor libcontainerd to minimize c8d RPCs") when this happens, the docker API reports a 255 exit code and no error: 0a7ddd027c0497d5a titus-executor-[900884]: Processing msg from a docker: main container exited with code 255 which is especially confusing. After4bafaa00aa
, the behavior has changed to report the container's real exit code, although there is still a hard coded timeout after which containerd will (try to) stop cleaning up. We would like to wait for this cleanup, so let's add a user configurable DeleteTimeout here. Reported-by: Hechao Li <hli@netflix.com> Signed-off-by: Tycho Andersen <tandersen@netflix.com>
This commit is contained in:
parent
8383c487c6
commit
988e94868a
5 changed files with 51 additions and 9 deletions
api/types/container
container
daemon
image/cache
|
@ -73,10 +73,11 @@ type Config struct {
|
|||
// Mac Address of the container.
|
||||
//
|
||||
// Deprecated: this field is deprecated since API v1.44. Use EndpointSettings.MacAddress instead.
|
||||
MacAddress string `json:",omitempty"`
|
||||
OnBuild []string // ONBUILD metadata that were defined on the image Dockerfile
|
||||
Labels map[string]string // List of labels set to this container
|
||||
StopSignal string `json:",omitempty"` // Signal to stop a container
|
||||
StopTimeout *int `json:",omitempty"` // Timeout (in seconds) to stop a container
|
||||
Shell strslice.StrSlice `json:",omitempty"` // Shell for shell-form of RUN, CMD, ENTRYPOINT
|
||||
MacAddress string `json:",omitempty"`
|
||||
OnBuild []string // ONBUILD metadata that were defined on the image Dockerfile
|
||||
Labels map[string]string // List of labels set to this container
|
||||
StopSignal string `json:",omitempty"` // Signal to stop a container
|
||||
StopTimeout *int `json:",omitempty"` // Timeout (in seconds) to stop a container
|
||||
DeleteTimeout *int `json:",omitempty"` // Timeout (in seconds) to wait for a container to be deleted
|
||||
Shell strslice.StrSlice `json:",omitempty"` // Shell for shell-form of RUN, CMD, ENTRYPOINT
|
||||
}
|
||||
|
|
|
@ -45,8 +45,9 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
configFileName = "config.v2.json"
|
||||
hostConfigFileName = "hostconfig.json"
|
||||
configFileName = "config.v2.json"
|
||||
hostConfigFileName = "hostconfig.json"
|
||||
defaultDeleteTimeout = 30
|
||||
)
|
||||
|
||||
// ExitStatus provides exit reasons for a container.
|
||||
|
@ -562,6 +563,15 @@ func (container *Container) StopTimeout() int {
|
|||
return defaultStopTimeout
|
||||
}
|
||||
|
||||
// DeleteTimeout returns the timeout (in seconds) to wait for a container to be deleted.
|
||||
func (container *Container) DeleteTimeout() int {
|
||||
if container.Config.DeleteTimeout != nil {
|
||||
return *container.Config.DeleteTimeout
|
||||
}
|
||||
|
||||
return defaultDeleteTimeout
|
||||
}
|
||||
|
||||
// InitDNSHostConfig ensures that the dns fields are never nil.
|
||||
// New containers don't ever have those fields nil,
|
||||
// but pre created containers can still have those nil values.
|
||||
|
|
|
@ -57,6 +57,26 @@ func TestContainerStopTimeout(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestContainerDeleteTimeout(t *testing.T) {
|
||||
c := &Container{
|
||||
Config: &container.Config{},
|
||||
}
|
||||
|
||||
s := c.DeleteTimeout()
|
||||
if s != defaultDeleteTimeout {
|
||||
t.Fatalf("Expected %v, got %v", defaultDeleteTimeout, s)
|
||||
}
|
||||
|
||||
deleteTimeout := 15
|
||||
c = &Container{
|
||||
Config: &container.Config{DeleteTimeout: &deleteTimeout},
|
||||
}
|
||||
s = c.DeleteTimeout()
|
||||
if s != deleteTimeout {
|
||||
t.Fatalf("Expected %v, got %v", deleteTimeout, s)
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainerSecretReferenceDestTarget(t *testing.T) {
|
||||
ref := &swarmtypes.SecretReference{
|
||||
File: &swarmtypes.SecretReferenceFileTarget{
|
||||
|
|
|
@ -39,7 +39,8 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
|
|||
|
||||
tsk, ok := c.Task()
|
||||
if ok {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
deleteTimeout := time.Duration(c.DeleteTimeout()) * time.Second
|
||||
ctx, cancel := context.WithTimeout(context.Background(), deleteTimeout)
|
||||
es, err := tsk.Delete(ctx)
|
||||
cancel()
|
||||
if err != nil {
|
||||
|
|
10
image/cache/compare.go
vendored
10
image/cache/compare.go
vendored
|
@ -149,6 +149,16 @@ func compare(a, b *container.Config) bool {
|
|||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if (a.DeleteTimeout == nil) != (b.DeleteTimeout == nil) {
|
||||
return false
|
||||
}
|
||||
if a.DeleteTimeout != nil && b.DeleteTimeout != nil {
|
||||
if *a.DeleteTimeout != *b.DeleteTimeout {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if (a.Healthcheck == nil) != (b.Healthcheck == nil) {
|
||||
return false
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue