소스 검색

Merge pull request #22566 from yongtang/22471-daemon-shutdown-timeout

Add config parameter to change per-container stop timeout during daemon shutdown
Vincent Demeester 8 년 전
부모
커밋
dad8cbfc2d

+ 2 - 2
api/server/router/container/backend.go

@@ -37,10 +37,10 @@ type stateBackend interface {
 	ContainerPause(name string) error
 	ContainerRename(oldName, newName string) error
 	ContainerResize(name string, height, width int) error
-	ContainerRestart(name string, seconds int) error
+	ContainerRestart(name string, seconds *int) error
 	ContainerRm(name string, config *types.ContainerRmConfig) error
 	ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
-	ContainerStop(name string, seconds int) error
+	ContainerStop(name string, seconds *int) error
 	ContainerUnpause(name string) error
 	ContainerUpdate(name string, hostConfig *container.HostConfig, validateHostname bool) (types.ContainerUpdateResponse, error)
 	ContainerWait(name string, timeout time.Duration) (int, error)

+ 17 - 3
api/server/router/container/container_routes.go

@@ -169,7 +169,14 @@ func (s *containerRouter) postContainersStop(ctx context.Context, w http.Respons
 		return err
 	}
 
-	seconds, _ := strconv.Atoi(r.Form.Get("t"))
+	var seconds *int
+	if tmpSeconds := r.Form.Get("t"); tmpSeconds != "" {
+		valSeconds, err := strconv.Atoi(tmpSeconds)
+		if err != nil {
+			return err
+		}
+		seconds = &valSeconds
+	}
 
 	if err := s.backend.ContainerStop(vars["name"], seconds); err != nil {
 		return err
@@ -223,9 +230,16 @@ func (s *containerRouter) postContainersRestart(ctx context.Context, w http.Resp
 		return err
 	}
 
-	timeout, _ := strconv.Atoi(r.Form.Get("t"))
+	var seconds *int
+	if tmpSeconds := r.Form.Get("t"); tmpSeconds != "" {
+		valSeconds, err := strconv.Atoi(tmpSeconds)
+		if err != nil {
+			return err
+		}
+		seconds = &valSeconds
+	}
 
-	if err := s.backend.ContainerRestart(vars["name"], timeout); err != nil {
+	if err := s.backend.ContainerRestart(vars["name"], seconds); err != nil {
 		return err
 	}
 

+ 10 - 3
cli/command/container/restart.go

@@ -13,7 +13,8 @@ import (
 )
 
 type restartOptions struct {
-	nSeconds int
+	nSeconds        int
+	nSecondsChanged bool
 
 	containers []string
 }
@@ -28,6 +29,7 @@ func NewRestartCommand(dockerCli *command.DockerCli) *cobra.Command {
 		Args:  cli.RequiresMinArgs(1),
 		RunE: func(cmd *cobra.Command, args []string) error {
 			opts.containers = args
+			opts.nSecondsChanged = cmd.Flags().Changed("time")
 			return runRestart(dockerCli, &opts)
 		},
 	}
@@ -40,9 +42,14 @@ func NewRestartCommand(dockerCli *command.DockerCli) *cobra.Command {
 func runRestart(dockerCli *command.DockerCli, opts *restartOptions) error {
 	ctx := context.Background()
 	var errs []string
+	var timeout *time.Duration
+	if opts.nSecondsChanged {
+		timeoutValue := time.Duration(opts.nSeconds) * time.Second
+		timeout = &timeoutValue
+	}
+
 	for _, name := range opts.containers {
-		timeout := time.Duration(opts.nSeconds) * time.Second
-		if err := dockerCli.Client().ContainerRestart(ctx, name, &timeout); err != nil {
+		if err := dockerCli.Client().ContainerRestart(ctx, name, timeout); err != nil {
 			errs = append(errs, err.Error())
 		} else {
 			fmt.Fprintf(dockerCli.Out(), "%s\n", name)

+ 10 - 3
cli/command/container/stop.go

@@ -13,7 +13,8 @@ import (
 )
 
 type stopOptions struct {
-	time int
+	time        int
+	timeChanged bool
 
 	containers []string
 }
@@ -28,6 +29,7 @@ func NewStopCommand(dockerCli *command.DockerCli) *cobra.Command {
 		Args:  cli.RequiresMinArgs(1),
 		RunE: func(cmd *cobra.Command, args []string) error {
 			opts.containers = args
+			opts.timeChanged = cmd.Flags().Changed("time")
 			return runStop(dockerCli, &opts)
 		},
 	}
@@ -39,12 +41,17 @@ func NewStopCommand(dockerCli *command.DockerCli) *cobra.Command {
 
 func runStop(dockerCli *command.DockerCli, opts *stopOptions) error {
 	ctx := context.Background()
-	timeout := time.Duration(opts.time) * time.Second
+
+	var timeout *time.Duration
+	if opts.timeChanged {
+		timeoutValue := time.Duration(opts.time) * time.Second
+		timeout = &timeoutValue
+	}
 
 	var errs []string
 
 	errChan := parallelOperation(ctx, opts.containers, func(ctx context.Context, id string) error {
-		return dockerCli.Client().ContainerStop(ctx, id, &timeout)
+		return dockerCli.Client().ContainerStop(ctx, id, timeout)
 	})
 	for _, container := range opts.containers {
 		if err := <-errChan; err != nil {

+ 9 - 3
cmd/dockerd/daemon.go

@@ -296,7 +296,7 @@ func (cli *DaemonCli) start(opts daemonOptions) (err error) {
 	// Wait for serve API to complete
 	errAPI := <-serveAPIWait
 	c.Cleanup()
-	shutdownDaemon(d, 15)
+	shutdownDaemon(d)
 	containerdRemote.Cleanup()
 	if errAPI != nil {
 		return fmt.Errorf("Shutting down due to ServeAPI error: %v", errAPI)
@@ -342,16 +342,22 @@ func (cli *DaemonCli) stop() {
 // shutdownDaemon just wraps daemon.Shutdown() to handle a timeout in case
 // d.Shutdown() is waiting too long to kill container or worst it's
 // blocked there
-func shutdownDaemon(d *daemon.Daemon, timeout time.Duration) {
+func shutdownDaemon(d *daemon.Daemon) {
+	shutdownTimeout := d.ShutdownTimeout()
 	ch := make(chan struct{})
 	go func() {
 		d.Shutdown()
 		close(ch)
 	}()
+	if shutdownTimeout < 0 {
+		<-ch
+		logrus.Debug("Clean shutdown succeeded")
+		return
+	}
 	select {
 	case <-ch:
 		logrus.Debug("Clean shutdown succeeded")
-	case <-time.After(timeout * time.Second):
+	case <-time.After(time.Duration(shutdownTimeout) * time.Second):
 		logrus.Error("Force shutdown daemon")
 	}
 }

+ 13 - 0
container/container.go

@@ -44,6 +44,11 @@ import (
 
 const configFileName = "config.v2.json"
 
+const (
+	// DefaultStopTimeout is the timeout (in seconds) for the syscall signal used to stop a container.
+	DefaultStopTimeout = 10
+)
+
 var (
 	errInvalidEndpoint = fmt.Errorf("invalid endpoint while building port map info")
 	errInvalidNetwork  = fmt.Errorf("invalid network settings while building port map info")
@@ -578,6 +583,14 @@ func (container *Container) StopSignal() int {
 	return int(stopSignal)
 }
 
+// StopTimeout returns the timeout (in seconds) used to stop the container.
+func (container *Container) StopTimeout() int {
+	if container.Config.StopTimeout != nil {
+		return *container.Config.StopTimeout
+	}
+	return DefaultStopTimeout
+}
+
 // InitDNSHostConfig ensures that the dns fields are never nil.
 // New containers don't ever have those fields nil,
 // but pre created containers can still have those nil values.

+ 24 - 0
container/container_unit_test.go

@@ -34,3 +34,27 @@ func TestContainerStopSignal(t *testing.T) {
 		t.Fatalf("Expected 9, got %v", s)
 	}
 }
+
+func TestContainerStopTimeout(t *testing.T) {
+	c := &Container{
+		CommonContainer: CommonContainer{
+			Config: &container.Config{},
+		},
+	}
+
+	s := c.StopTimeout()
+	if s != DefaultStopTimeout {
+		t.Fatalf("Expected %v, got %v", DefaultStopTimeout, s)
+	}
+
+	stopTimeout := 15
+	c = &Container{
+		CommonContainer: CommonContainer{
+			Config: &container.Config{StopTimeout: &stopTimeout},
+		},
+	}
+	s = c.StopSignal()
+	if s != 15 {
+		t.Fatalf("Expected 15, got %v", s)
+	}
+}

+ 1 - 1
daemon/cluster/executor/backend.go

@@ -25,7 +25,7 @@ type Backend interface {
 	PullImage(ctx context.Context, image, tag string, metaHeaders map[string][]string, authConfig *types.AuthConfig, outStream io.Writer) error
 	CreateManagedContainer(config types.ContainerCreateConfig, validateHostname bool) (types.ContainerCreateResponse, error)
 	ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
-	ContainerStop(name string, seconds int) error
+	ContainerStop(name string, seconds *int) error
 	ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error
 	UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error
 	ContainerInspectCurrent(name string, size bool) (*types.ContainerJSON, error)

+ 4 - 3
daemon/cluster/executor/container/adapter.go

@@ -279,11 +279,12 @@ func (c *containerAdapter) wait(ctx context.Context) error {
 }
 
 func (c *containerAdapter) shutdown(ctx context.Context) error {
-	// Default stop grace period to 10s.
-	stopgrace := 10
+	// Default stop grace period to nil (daemon will use the stopTimeout of the container)
+	var stopgrace *int
 	spec := c.container.spec()
 	if spec.StopGracePeriod != nil {
-		stopgrace = int(spec.StopGracePeriod.Seconds)
+		stopgraceValue := int(spec.StopGracePeriod.Seconds)
+		stopgrace = &stopgraceValue
 	}
 	return c.backend.ContainerStop(c.container.name(), stopgrace)
 }

+ 29 - 4
daemon/daemon.go

@@ -692,6 +692,8 @@ func NewDaemon(config *Config, registryService registry.Service, containerdRemot
 }
 
 func (daemon *Daemon) shutdownContainer(c *container.Container) error {
+	stopTimeout := c.StopTimeout()
+
 	// TODO(windows): Handle docker restart with paused containers
 	if c.IsPaused() {
 		// To terminate a process in freezer cgroup, we should send
@@ -708,8 +710,8 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
 		if err := daemon.containerUnpause(c); err != nil {
 			return fmt.Errorf("Failed to unpause container %s with error: %v", c.ID, err)
 		}
-		if _, err := c.WaitStop(10 * time.Second); err != nil {
-			logrus.Debugf("container %s failed to exit in 10 seconds of SIGTERM, sending SIGKILL to force", c.ID)
+		if _, err := c.WaitStop(time.Duration(stopTimeout) * time.Second); err != nil {
+			logrus.Debugf("container %s failed to exit in %d second of SIGTERM, sending SIGKILL to force", c.ID, stopTimeout)
 			sig, ok := signal.SignalMap["KILL"]
 			if !ok {
 				return fmt.Errorf("System does not support SIGKILL")
@@ -721,8 +723,8 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
 			return err
 		}
 	}
-	// If container failed to exit in 10 seconds of SIGTERM, then using the force
-	if err := daemon.containerStop(c, 10); err != nil {
+	// If container failed to exit in stopTimeout seconds of SIGTERM, then using the force
+	if err := daemon.containerStop(c, stopTimeout); err != nil {
 		return fmt.Errorf("Failed to stop container %s with error: %v", c.ID, err)
 	}
 
@@ -730,6 +732,29 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
 	return nil
 }
 
+// ShutdownTimeout returns the shutdown timeout based on the max stopTimeout of the containers
+func (daemon *Daemon) ShutdownTimeout() int {
+	// By default we use container.DefaultStopTimeout + 5s, which is 15s.
+	// TODO (yongtang): Will need to allow shutdown-timeout once #23036 is in place.
+	graceTimeout := 5
+	shutdownTimeout := container.DefaultStopTimeout + graceTimeout
+	if daemon.containers != nil {
+		for _, c := range daemon.containers.List() {
+			if shutdownTimeout >= 0 {
+				stopTimeout := c.StopTimeout()
+				if stopTimeout < 0 {
+					shutdownTimeout = -1
+				} else {
+					if stopTimeout+graceTimeout > shutdownTimeout {
+						shutdownTimeout = stopTimeout + graceTimeout
+					}
+				}
+			}
+		}
+	}
+	return shutdownTimeout
+}
+
 // Shutdown stops the daemon.
 func (daemon *Daemon) Shutdown() error {
 	daemon.shutdown = true

+ 7 - 2
daemon/restart.go

@@ -13,15 +13,20 @@ import (
 // timeout, ContainerRestart will wait forever until a graceful
 // stop. Returns an error if the container cannot be found, or if
 // there is an underlying error at any stage of the restart.
-func (daemon *Daemon) ContainerRestart(name string, seconds int) error {
+func (daemon *Daemon) ContainerRestart(name string, seconds *int) error {
 	container, err := daemon.GetContainer(name)
 	if err != nil {
 		return err
 	}
-	if err := daemon.containerRestart(container, seconds); err != nil {
+	if seconds == nil {
+		stopTimeout := container.StopTimeout()
+		seconds = &stopTimeout
+	}
+	if err := daemon.containerRestart(container, *seconds); err != nil {
 		return fmt.Errorf("Cannot restart container %s: %v", name, err)
 	}
 	return nil
+
 }
 
 // containerRestart attempts to gracefully stop and then start the

+ 6 - 2
daemon/stop.go

@@ -16,7 +16,7 @@ import (
 // will wait for a graceful termination. An error is returned if the
 // container is not found, is already stopped, or if there is a
 // problem stopping the container.
-func (daemon *Daemon) ContainerStop(name string, seconds int) error {
+func (daemon *Daemon) ContainerStop(name string, seconds *int) error {
 	container, err := daemon.GetContainer(name)
 	if err != nil {
 		return err
@@ -25,7 +25,11 @@ func (daemon *Daemon) ContainerStop(name string, seconds int) error {
 		err := fmt.Errorf("Container %s is already stopped", name)
 		return errors.NewErrorWithStatusCode(err, http.StatusNotModified)
 	}
-	if err := daemon.containerStop(container, seconds); err != nil {
+	if seconds == nil {
+		stopTimeout := container.StopTimeout()
+		seconds = &stopTimeout
+	}
+	if err := daemon.containerStop(container, *seconds); err != nil {
 		return fmt.Errorf("Cannot stop container %s: %v", name, err)
 	}
 	return nil

+ 1 - 0
docs/reference/api/docker_remote_api.md

@@ -128,6 +128,7 @@ This section lists each version from latest to oldest.  Each listing includes a
 * `DELETE /containers/(name)` endpoint now returns an error of `removal of container name is already in progress` with status code of 400, when container name is in a state of removal in progress.
 * `GET /containers/json` now supports a `is-task` filter to filter
   containers that are tasks (part of a service in swarm mode).
+* `POST /containers/create` now takes `StopTimeout` field.
 
 ### v1.24 API changes
 

+ 4 - 1
docs/reference/api/docker_remote_api_v1.25.md

@@ -284,6 +284,7 @@ Create a container
                    "22/tcp": {}
            },
            "StopSignal": "SIGTERM",
+           "StopTimeout": 10,
            "HostConfig": {
              "Binds": ["/tmp:/tmp"],
              "Links": ["redis3:redis"],
@@ -391,6 +392,7 @@ Create a container
 -   **ExposedPorts** - An object mapping ports to an empty object in the form of:
       `"ExposedPorts": { "<port>/<tcp|udp>: {}" }`
 -   **StopSignal** - Signal to stop a container as a string or unsigned integer. `SIGTERM` by default.
+-   **StopTimeout** - Timeout (in seconds) to stop a container. 10 by default.
 -   **HostConfig**
     -   **Binds** – A list of volume bindings for this container. Each volume binding is a string in one of these forms:
            + `host-src:container-dest` to bind-mount a host path into the
@@ -580,7 +582,8 @@ Return low-level information on the container `id`
 				"/volumes/data": {}
 			},
 			"WorkingDir": "",
-			"StopSignal": "SIGTERM"
+			"StopSignal": "SIGTERM",
+			"StopTimeout": 10
 		},
 		"Created": "2015-01-06T15:47:31.485331387Z",
 		"Driver": "devicemapper",

+ 1 - 0
docs/reference/commandline/create.md

@@ -94,6 +94,7 @@ Options:
                                     Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes),
                                     or `g` (gigabytes). If you omit the unit, the system uses bytes.
       --stop-signal string          Signal to stop a container, SIGTERM by default (default "SIGTERM")
+      --stop-timeout=10             Timeout (in seconds) to stop a container
       --storage-opt value           Storage driver options for the container (default [])
       --sysctl value                Sysctl options (default map[])
       --tmpfs value                 Mount a tmpfs directory (default [])

+ 6 - 0
docs/reference/commandline/run.md

@@ -101,6 +101,7 @@ Options:
                                     or `g` (gigabytes). If you omit the unit, the system uses bytes.
       --sig-proxy                   Proxy received signals to the process (default true)
       --stop-signal string          Signal to stop a container, SIGTERM by default (default "SIGTERM")
+      --stop-timeout=10             Timeout (in seconds) to stop a container
       --storage-opt value           Storage driver options for the container (default [])
       --sysctl value                Sysctl options (default map[])
       --tmpfs value                 Mount a tmpfs directory (default [])
@@ -620,6 +621,11 @@ or a signal name in the format SIGNAME, for instance SIGKILL.
 On Windows, this flag can be used to specify the `credentialspec` option. 
 The `credentialspec` must be in the format `file://spec.txt` or `registry://keyname`. 
 
+### Stop container with timeout (--stop-timeout)
+
+The `--stop-timeout` flag sets the the timeout (in seconds) that a pre-defined (see `--stop-signal`) system call
+signal that will be sent to the container to exit. After timeout elapses the container will be killed with SIGKILL.
+
 ### Specify isolation technology for container (--isolation)
 
 This option is useful in situations where you are running Docker containers on

+ 15 - 0
integration-cli/docker_cli_create_test.go

@@ -496,3 +496,18 @@ exec "$@"`,
 	out, _ = dockerCmd(c, "start", "-a", id)
 	c.Assert(strings.TrimSpace(out), check.Equals, "foo")
 }
+
+// #22471
+func (s *DockerSuite) TestCreateStopTimeout(c *check.C) {
+	name1 := "test_create_stop_timeout_1"
+	dockerCmd(c, "create", "--name", name1, "--stop-timeout", "15", "busybox")
+
+	res := inspectFieldJSON(c, name1, "Config.StopTimeout")
+	c.Assert(res, checker.Contains, "15")
+
+	name2 := "test_create_stop_timeout_2"
+	dockerCmd(c, "create", "--name", name2, "busybox")
+
+	res = inspectFieldJSON(c, name2, "Config.StopTimeout")
+	c.Assert(res, checker.Contains, "null")
+}

+ 4 - 0
man/docker-create.1.md

@@ -68,6 +68,7 @@ docker-create - Create a new container
 [**--security-opt**[=*[]*]]
 [**--storage-opt**[=*[]*]]
 [**--stop-signal**[=*SIGNAL*]]
+[**--stop-timeout**[=*TIMEOUT*]]
 [**--shm-size**[=*[]*]]
 [**--sysctl**[=*[]*]]
 [**-t**|**--tty**]
@@ -352,6 +353,9 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
 **--stop-signal**=*SIGTERM*
   Signal to stop a container. Default is SIGTERM.
 
+**--stop-timeout**=*10*
+  Timeout (in seconds) to stop a container. Default is 10.
+
 **--sysctl**=SYSCTL
   Configure namespaced kernel parameters at runtime
 

+ 4 - 0
man/docker-run.1.md

@@ -70,6 +70,7 @@ docker-run - Run a command in a new container
 [**--security-opt**[=*[]*]]
 [**--storage-opt**[=*[]*]]
 [**--stop-signal**[=*SIGNAL*]]
+[**--stop-timeout**[=*TIMEOUT*]]
 [**--shm-size**[=*[]*]]
 [**--sig-proxy**[=*true*]]
 [**--sysctl**[=*[]*]]
@@ -502,6 +503,9 @@ incompatible with any restart policy other than `none`.
 **--stop-signal**=*SIGTERM*
   Signal to stop a container. Default is SIGTERM.
 
+**--stop-timeout**=*10*
+  Timeout (in seconds) to stop a container. Default is 10.
+
 **--shm-size**=""
    Size of `/dev/shm`. The format is `<number><unit>`.
    `number` must be greater than `0`.  Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes).

+ 5 - 0
runconfig/opts/parse.go

@@ -94,6 +94,7 @@ type ContainerOptions struct {
 	cgroupParent      string
 	volumeDriver      string
 	stopSignal        string
+	stopTimeout       int
 	isolation         string
 	shmSize           string
 	noHealthcheck     bool
@@ -161,6 +162,7 @@ func AddFlags(flags *pflag.FlagSet) *ContainerOptions {
 	flags.BoolVar(&copts.readonlyRootfs, "read-only", false, "Mount the container's root filesystem as read only")
 	flags.StringVar(&copts.restartPolicy, "restart", "no", "Restart policy to apply when a container exits")
 	flags.StringVar(&copts.stopSignal, "stop-signal", signal.DefaultStopSignal, fmt.Sprintf("Signal to stop a container, %v by default", signal.DefaultStopSignal))
+	flags.IntVar(&copts.stopTimeout, "stop-timeout", 0, "Timeout (in seconds) to stop a container")
 	flags.Var(copts.sysctls, "sysctl", "Sysctl options")
 	flags.BoolVarP(&copts.tty, "tty", "t", false, "Allocate a pseudo-TTY")
 	flags.Var(copts.ulimits, "ulimit", "Ulimit options")
@@ -558,6 +560,9 @@ func Parse(flags *pflag.FlagSet, copts *ContainerOptions) (*container.Config, *c
 	if flags.Changed("stop-signal") {
 		config.StopSignal = copts.stopSignal
 	}
+	if flags.Changed("stop-timeout") {
+		config.StopTimeout = &copts.stopTimeout
+	}
 
 	hostConfig := &container.HostConfig{
 		Binds:           binds,