Merge pull request #22566 from yongtang/22471-daemon-shutdown-timeout

Add config parameter to change per-container stop timeout during daemon shutdown
This commit is contained in:
Vincent Demeester 2016-10-18 14:33:09 +02:00 committed by GitHub
commit dad8cbfc2d
20 changed files with 172 additions and 27 deletions

View file

@ -37,10 +37,10 @@ type stateBackend interface {
ContainerPause(name string) error
ContainerRename(oldName, newName string) error
ContainerResize(name string, height, width int) error
ContainerRestart(name string, seconds int) error
ContainerRestart(name string, seconds *int) error
ContainerRm(name string, config *types.ContainerRmConfig) error
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
ContainerStop(name string, seconds int) error
ContainerStop(name string, seconds *int) error
ContainerUnpause(name string) error
ContainerUpdate(name string, hostConfig *container.HostConfig, validateHostname bool) (types.ContainerUpdateResponse, error)
ContainerWait(name string, timeout time.Duration) (int, error)

View file

@ -169,7 +169,14 @@ func (s *containerRouter) postContainersStop(ctx context.Context, w http.Respons
return err
}
seconds, _ := strconv.Atoi(r.Form.Get("t"))
var seconds *int
if tmpSeconds := r.Form.Get("t"); tmpSeconds != "" {
valSeconds, err := strconv.Atoi(tmpSeconds)
if err != nil {
return err
}
seconds = &valSeconds
}
if err := s.backend.ContainerStop(vars["name"], seconds); err != nil {
return err
@ -223,9 +230,16 @@ func (s *containerRouter) postContainersRestart(ctx context.Context, w http.Resp
return err
}
timeout, _ := strconv.Atoi(r.Form.Get("t"))
var seconds *int
if tmpSeconds := r.Form.Get("t"); tmpSeconds != "" {
valSeconds, err := strconv.Atoi(tmpSeconds)
if err != nil {
return err
}
seconds = &valSeconds
}
if err := s.backend.ContainerRestart(vars["name"], timeout); err != nil {
if err := s.backend.ContainerRestart(vars["name"], seconds); err != nil {
return err
}

View file

@ -13,7 +13,8 @@ import (
)
type restartOptions struct {
nSeconds int
nSeconds int
nSecondsChanged bool
containers []string
}
@ -28,6 +29,7 @@ func NewRestartCommand(dockerCli *command.DockerCli) *cobra.Command {
Args: cli.RequiresMinArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
opts.containers = args
opts.nSecondsChanged = cmd.Flags().Changed("time")
return runRestart(dockerCli, &opts)
},
}
@ -40,9 +42,14 @@ func NewRestartCommand(dockerCli *command.DockerCli) *cobra.Command {
func runRestart(dockerCli *command.DockerCli, opts *restartOptions) error {
ctx := context.Background()
var errs []string
var timeout *time.Duration
if opts.nSecondsChanged {
timeoutValue := time.Duration(opts.nSeconds) * time.Second
timeout = &timeoutValue
}
for _, name := range opts.containers {
timeout := time.Duration(opts.nSeconds) * time.Second
if err := dockerCli.Client().ContainerRestart(ctx, name, &timeout); err != nil {
if err := dockerCli.Client().ContainerRestart(ctx, name, timeout); err != nil {
errs = append(errs, err.Error())
} else {
fmt.Fprintf(dockerCli.Out(), "%s\n", name)

View file

@ -13,7 +13,8 @@ import (
)
type stopOptions struct {
time int
time int
timeChanged bool
containers []string
}
@ -28,6 +29,7 @@ func NewStopCommand(dockerCli *command.DockerCli) *cobra.Command {
Args: cli.RequiresMinArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
opts.containers = args
opts.timeChanged = cmd.Flags().Changed("time")
return runStop(dockerCli, &opts)
},
}
@ -39,12 +41,17 @@ func NewStopCommand(dockerCli *command.DockerCli) *cobra.Command {
func runStop(dockerCli *command.DockerCli, opts *stopOptions) error {
ctx := context.Background()
timeout := time.Duration(opts.time) * time.Second
var timeout *time.Duration
if opts.timeChanged {
timeoutValue := time.Duration(opts.time) * time.Second
timeout = &timeoutValue
}
var errs []string
errChan := parallelOperation(ctx, opts.containers, func(ctx context.Context, id string) error {
return dockerCli.Client().ContainerStop(ctx, id, &timeout)
return dockerCli.Client().ContainerStop(ctx, id, timeout)
})
for _, container := range opts.containers {
if err := <-errChan; err != nil {

View file

@ -296,7 +296,7 @@ func (cli *DaemonCli) start(opts daemonOptions) (err error) {
// Wait for serve API to complete
errAPI := <-serveAPIWait
c.Cleanup()
shutdownDaemon(d, 15)
shutdownDaemon(d)
containerdRemote.Cleanup()
if errAPI != nil {
return fmt.Errorf("Shutting down due to ServeAPI error: %v", errAPI)
@ -342,16 +342,22 @@ func (cli *DaemonCli) stop() {
// shutdownDaemon just wraps daemon.Shutdown() to handle a timeout in case
// d.Shutdown() is waiting too long to kill container or worst it's
// blocked there
func shutdownDaemon(d *daemon.Daemon, timeout time.Duration) {
func shutdownDaemon(d *daemon.Daemon) {
shutdownTimeout := d.ShutdownTimeout()
ch := make(chan struct{})
go func() {
d.Shutdown()
close(ch)
}()
if shutdownTimeout < 0 {
<-ch
logrus.Debug("Clean shutdown succeeded")
return
}
select {
case <-ch:
logrus.Debug("Clean shutdown succeeded")
case <-time.After(timeout * time.Second):
case <-time.After(time.Duration(shutdownTimeout) * time.Second):
logrus.Error("Force shutdown daemon")
}
}

View file

@ -44,6 +44,11 @@ import (
const configFileName = "config.v2.json"
const (
// DefaultStopTimeout is the timeout (in seconds) for the syscall signal used to stop a container.
DefaultStopTimeout = 10
)
var (
errInvalidEndpoint = fmt.Errorf("invalid endpoint while building port map info")
errInvalidNetwork = fmt.Errorf("invalid network settings while building port map info")
@ -578,6 +583,14 @@ func (container *Container) StopSignal() int {
return int(stopSignal)
}
// StopTimeout returns the timeout (in seconds) used to stop the container.
func (container *Container) StopTimeout() int {
if container.Config.StopTimeout != nil {
return *container.Config.StopTimeout
}
return DefaultStopTimeout
}
// InitDNSHostConfig ensures that the dns fields are never nil.
// New containers don't ever have those fields nil,
// but pre created containers can still have those nil values.

View file

@ -34,3 +34,27 @@ func TestContainerStopSignal(t *testing.T) {
t.Fatalf("Expected 9, got %v", s)
}
}
func TestContainerStopTimeout(t *testing.T) {
c := &Container{
CommonContainer: CommonContainer{
Config: &container.Config{},
},
}
s := c.StopTimeout()
if s != DefaultStopTimeout {
t.Fatalf("Expected %v, got %v", DefaultStopTimeout, s)
}
stopTimeout := 15
c = &Container{
CommonContainer: CommonContainer{
Config: &container.Config{StopTimeout: &stopTimeout},
},
}
s = c.StopSignal()
if s != 15 {
t.Fatalf("Expected 15, got %v", s)
}
}

View file

@ -25,7 +25,7 @@ type Backend interface {
PullImage(ctx context.Context, image, tag string, metaHeaders map[string][]string, authConfig *types.AuthConfig, outStream io.Writer) error
CreateManagedContainer(config types.ContainerCreateConfig, validateHostname bool) (types.ContainerCreateResponse, error)
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
ContainerStop(name string, seconds int) error
ContainerStop(name string, seconds *int) error
ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error
UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error
ContainerInspectCurrent(name string, size bool) (*types.ContainerJSON, error)

View file

@ -279,11 +279,12 @@ func (c *containerAdapter) wait(ctx context.Context) error {
}
func (c *containerAdapter) shutdown(ctx context.Context) error {
// Default stop grace period to 10s.
stopgrace := 10
// Default stop grace period to nil (daemon will use the stopTimeout of the container)
var stopgrace *int
spec := c.container.spec()
if spec.StopGracePeriod != nil {
stopgrace = int(spec.StopGracePeriod.Seconds)
stopgraceValue := int(spec.StopGracePeriod.Seconds)
stopgrace = &stopgraceValue
}
return c.backend.ContainerStop(c.container.name(), stopgrace)
}

View file

@ -692,6 +692,8 @@ func NewDaemon(config *Config, registryService registry.Service, containerdRemot
}
func (daemon *Daemon) shutdownContainer(c *container.Container) error {
stopTimeout := c.StopTimeout()
// TODO(windows): Handle docker restart with paused containers
if c.IsPaused() {
// To terminate a process in freezer cgroup, we should send
@ -708,8 +710,8 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
if err := daemon.containerUnpause(c); err != nil {
return fmt.Errorf("Failed to unpause container %s with error: %v", c.ID, err)
}
if _, err := c.WaitStop(10 * time.Second); err != nil {
logrus.Debugf("container %s failed to exit in 10 seconds of SIGTERM, sending SIGKILL to force", c.ID)
if _, err := c.WaitStop(time.Duration(stopTimeout) * time.Second); err != nil {
logrus.Debugf("container %s failed to exit in %d second of SIGTERM, sending SIGKILL to force", c.ID, stopTimeout)
sig, ok := signal.SignalMap["KILL"]
if !ok {
return fmt.Errorf("System does not support SIGKILL")
@ -721,8 +723,8 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
return err
}
}
// If container failed to exit in 10 seconds of SIGTERM, then using the force
if err := daemon.containerStop(c, 10); err != nil {
// If container failed to exit in stopTimeout seconds of SIGTERM, then using the force
if err := daemon.containerStop(c, stopTimeout); err != nil {
return fmt.Errorf("Failed to stop container %s with error: %v", c.ID, err)
}
@ -730,6 +732,29 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
return nil
}
// ShutdownTimeout returns the shutdown timeout based on the max stopTimeout of the containers
func (daemon *Daemon) ShutdownTimeout() int {
// By default we use container.DefaultStopTimeout + 5s, which is 15s.
// TODO (yongtang): Will need to allow shutdown-timeout once #23036 is in place.
graceTimeout := 5
shutdownTimeout := container.DefaultStopTimeout + graceTimeout
if daemon.containers != nil {
for _, c := range daemon.containers.List() {
if shutdownTimeout >= 0 {
stopTimeout := c.StopTimeout()
if stopTimeout < 0 {
shutdownTimeout = -1
} else {
if stopTimeout+graceTimeout > shutdownTimeout {
shutdownTimeout = stopTimeout + graceTimeout
}
}
}
}
}
return shutdownTimeout
}
// Shutdown stops the daemon.
func (daemon *Daemon) Shutdown() error {
daemon.shutdown = true

View file

@ -13,15 +13,20 @@ import (
// timeout, ContainerRestart will wait forever until a graceful
// stop. Returns an error if the container cannot be found, or if
// there is an underlying error at any stage of the restart.
func (daemon *Daemon) ContainerRestart(name string, seconds int) error {
func (daemon *Daemon) ContainerRestart(name string, seconds *int) error {
container, err := daemon.GetContainer(name)
if err != nil {
return err
}
if err := daemon.containerRestart(container, seconds); err != nil {
if seconds == nil {
stopTimeout := container.StopTimeout()
seconds = &stopTimeout
}
if err := daemon.containerRestart(container, *seconds); err != nil {
return fmt.Errorf("Cannot restart container %s: %v", name, err)
}
return nil
}
// containerRestart attempts to gracefully stop and then start the

View file

@ -16,7 +16,7 @@ import (
// will wait for a graceful termination. An error is returned if the
// container is not found, is already stopped, or if there is a
// problem stopping the container.
func (daemon *Daemon) ContainerStop(name string, seconds int) error {
func (daemon *Daemon) ContainerStop(name string, seconds *int) error {
container, err := daemon.GetContainer(name)
if err != nil {
return err
@ -25,7 +25,11 @@ func (daemon *Daemon) ContainerStop(name string, seconds int) error {
err := fmt.Errorf("Container %s is already stopped", name)
return errors.NewErrorWithStatusCode(err, http.StatusNotModified)
}
if err := daemon.containerStop(container, seconds); err != nil {
if seconds == nil {
stopTimeout := container.StopTimeout()
seconds = &stopTimeout
}
if err := daemon.containerStop(container, *seconds); err != nil {
return fmt.Errorf("Cannot stop container %s: %v", name, err)
}
return nil

View file

@ -128,6 +128,7 @@ This section lists each version from latest to oldest. Each listing includes a
* `DELETE /containers/(name)` endpoint now returns an error of `removal of container name is already in progress` with status code of 400, when container name is in a state of removal in progress.
* `GET /containers/json` now supports a `is-task` filter to filter
containers that are tasks (part of a service in swarm mode).
* `POST /containers/create` now takes `StopTimeout` field.
### v1.24 API changes

View file

@ -284,6 +284,7 @@ Create a container
"22/tcp": {}
},
"StopSignal": "SIGTERM",
"StopTimeout": 10,
"HostConfig": {
"Binds": ["/tmp:/tmp"],
"Links": ["redis3:redis"],
@ -391,6 +392,7 @@ Create a container
- **ExposedPorts** - An object mapping ports to an empty object in the form of:
`"ExposedPorts": { "<port>/<tcp|udp>: {}" }`
- **StopSignal** - Signal to stop a container as a string or unsigned integer. `SIGTERM` by default.
- **StopTimeout** - Timeout (in seconds) to stop a container. 10 by default.
- **HostConfig**
- **Binds** A list of volume bindings for this container. Each volume binding is a string in one of these forms:
+ `host-src:container-dest` to bind-mount a host path into the
@ -580,7 +582,8 @@ Return low-level information on the container `id`
"/volumes/data": {}
},
"WorkingDir": "",
"StopSignal": "SIGTERM"
"StopSignal": "SIGTERM",
"StopTimeout": 10
},
"Created": "2015-01-06T15:47:31.485331387Z",
"Driver": "devicemapper",

View file

@ -94,6 +94,7 @@ Options:
Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes),
or `g` (gigabytes). If you omit the unit, the system uses bytes.
--stop-signal string Signal to stop a container, SIGTERM by default (default "SIGTERM")
--stop-timeout=10 Timeout (in seconds) to stop a container
--storage-opt value Storage driver options for the container (default [])
--sysctl value Sysctl options (default map[])
--tmpfs value Mount a tmpfs directory (default [])

View file

@ -101,6 +101,7 @@ Options:
or `g` (gigabytes). If you omit the unit, the system uses bytes.
--sig-proxy Proxy received signals to the process (default true)
--stop-signal string Signal to stop a container, SIGTERM by default (default "SIGTERM")
--stop-timeout=10 Timeout (in seconds) to stop a container
--storage-opt value Storage driver options for the container (default [])
--sysctl value Sysctl options (default map[])
--tmpfs value Mount a tmpfs directory (default [])
@ -620,6 +621,11 @@ or a signal name in the format SIGNAME, for instance SIGKILL.
On Windows, this flag can be used to specify the `credentialspec` option.
The `credentialspec` must be in the format `file://spec.txt` or `registry://keyname`.
### Stop container with timeout (--stop-timeout)
The `--stop-timeout` flag sets the the timeout (in seconds) that a pre-defined (see `--stop-signal`) system call
signal that will be sent to the container to exit. After timeout elapses the container will be killed with SIGKILL.
### Specify isolation technology for container (--isolation)
This option is useful in situations where you are running Docker containers on

View file

@ -496,3 +496,18 @@ exec "$@"`,
out, _ = dockerCmd(c, "start", "-a", id)
c.Assert(strings.TrimSpace(out), check.Equals, "foo")
}
// #22471
func (s *DockerSuite) TestCreateStopTimeout(c *check.C) {
name1 := "test_create_stop_timeout_1"
dockerCmd(c, "create", "--name", name1, "--stop-timeout", "15", "busybox")
res := inspectFieldJSON(c, name1, "Config.StopTimeout")
c.Assert(res, checker.Contains, "15")
name2 := "test_create_stop_timeout_2"
dockerCmd(c, "create", "--name", name2, "busybox")
res = inspectFieldJSON(c, name2, "Config.StopTimeout")
c.Assert(res, checker.Contains, "null")
}

View file

@ -68,6 +68,7 @@ docker-create - Create a new container
[**--security-opt**[=*[]*]]
[**--storage-opt**[=*[]*]]
[**--stop-signal**[=*SIGNAL*]]
[**--stop-timeout**[=*TIMEOUT*]]
[**--shm-size**[=*[]*]]
[**--sysctl**[=*[]*]]
[**-t**|**--tty**]
@ -352,6 +353,9 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
**--stop-signal**=*SIGTERM*
Signal to stop a container. Default is SIGTERM.
**--stop-timeout**=*10*
Timeout (in seconds) to stop a container. Default is 10.
**--sysctl**=SYSCTL
Configure namespaced kernel parameters at runtime

View file

@ -70,6 +70,7 @@ docker-run - Run a command in a new container
[**--security-opt**[=*[]*]]
[**--storage-opt**[=*[]*]]
[**--stop-signal**[=*SIGNAL*]]
[**--stop-timeout**[=*TIMEOUT*]]
[**--shm-size**[=*[]*]]
[**--sig-proxy**[=*true*]]
[**--sysctl**[=*[]*]]
@ -502,6 +503,9 @@ incompatible with any restart policy other than `none`.
**--stop-signal**=*SIGTERM*
Signal to stop a container. Default is SIGTERM.
**--stop-timeout**=*10*
Timeout (in seconds) to stop a container. Default is 10.
**--shm-size**=""
Size of `/dev/shm`. The format is `<number><unit>`.
`number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes).

View file

@ -94,6 +94,7 @@ type ContainerOptions struct {
cgroupParent string
volumeDriver string
stopSignal string
stopTimeout int
isolation string
shmSize string
noHealthcheck bool
@ -161,6 +162,7 @@ func AddFlags(flags *pflag.FlagSet) *ContainerOptions {
flags.BoolVar(&copts.readonlyRootfs, "read-only", false, "Mount the container's root filesystem as read only")
flags.StringVar(&copts.restartPolicy, "restart", "no", "Restart policy to apply when a container exits")
flags.StringVar(&copts.stopSignal, "stop-signal", signal.DefaultStopSignal, fmt.Sprintf("Signal to stop a container, %v by default", signal.DefaultStopSignal))
flags.IntVar(&copts.stopTimeout, "stop-timeout", 0, "Timeout (in seconds) to stop a container")
flags.Var(copts.sysctls, "sysctl", "Sysctl options")
flags.BoolVarP(&copts.tty, "tty", "t", false, "Allocate a pseudo-TTY")
flags.Var(copts.ulimits, "ulimit", "Ulimit options")
@ -558,6 +560,9 @@ func Parse(flags *pflag.FlagSet, copts *ContainerOptions) (*container.Config, *c
if flags.Changed("stop-signal") {
config.StopSignal = copts.stopSignal
}
if flags.Changed("stop-timeout") {
config.StopTimeout = &copts.stopTimeout
}
hostConfig := &container.HostConfig{
Binds: binds,