Decouple daemon and container to stop and kill containers.
Signed-off-by: David Calavera <david.calavera@gmail.com>
This commit is contained in:
parent
581380cc6c
commit
4f2a5ba360
12 changed files with 177 additions and 149 deletions
|
@ -128,6 +128,8 @@ type Docker interface {
|
|||
// Release releases a list of images that were retained for the time of a build.
|
||||
// TODO: remove
|
||||
Release(sessionID string, activeImages []string)
|
||||
// Kill stops the container execution abruptly.
|
||||
Kill(c *daemon.Container) error
|
||||
}
|
||||
|
||||
// ImageCache abstracts an image cache store.
|
||||
|
|
|
@ -559,7 +559,7 @@ func (b *Builder) run(c *daemon.Container) error {
|
|||
select {
|
||||
case <-b.cancelled:
|
||||
logrus.Debugln("Build cancelled, killing and removing container:", c.ID)
|
||||
c.Kill()
|
||||
b.docker.Kill(c)
|
||||
b.removeContainer(c.ID)
|
||||
case <-finished:
|
||||
}
|
||||
|
|
|
@ -337,51 +337,10 @@ func (container *Container) cleanup() {
|
|||
}
|
||||
}
|
||||
|
||||
// killSig sends the container the given signal. This wrapper for the
|
||||
// host specific kill command prepares the container before attempting
|
||||
// to send the signal. An error is returned if the container is paused
|
||||
// or not running, or if there is a problem returned from the
|
||||
// underlying kill command.
|
||||
func (container *Container) killSig(sig int) error {
|
||||
logrus.Debugf("Sending %d to %s", sig, container.ID)
|
||||
container.Lock()
|
||||
defer container.Unlock()
|
||||
|
||||
// We could unpause the container for them rather than returning this error
|
||||
if container.Paused {
|
||||
return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID)
|
||||
}
|
||||
|
||||
if !container.Running {
|
||||
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
|
||||
}
|
||||
|
||||
// signal to the monitor that it should not restart the container
|
||||
// after we send the kill signal
|
||||
// ExitOnNext signals to the monitor that it should not restart the container
|
||||
// after we send the kill signal.
|
||||
func (container *Container) ExitOnNext() {
|
||||
container.monitor.ExitOnNext()
|
||||
|
||||
// if the container is currently restarting we do not need to send the signal
|
||||
// to the process. Telling the monitor that it should exit on it's next event
|
||||
// loop is enough
|
||||
if container.Restarting {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := container.daemon.kill(container, sig); err != nil {
|
||||
return err
|
||||
}
|
||||
container.logEvent("kill")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wrapper aroung killSig() suppressing "no such process" error.
|
||||
func (container *Container) killPossiblyDeadProcess(sig int) error {
|
||||
err := container.killSig(sig)
|
||||
if err == syscall.ESRCH {
|
||||
logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.getPID(), sig)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (container *Container) pause() error {
|
||||
|
@ -428,98 +387,6 @@ func (container *Container) unpause() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Kill forcefully terminates a container.
|
||||
func (container *Container) Kill() error {
|
||||
if !container.IsRunning() {
|
||||
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
|
||||
}
|
||||
|
||||
// 1. Send SIGKILL
|
||||
if err := container.killPossiblyDeadProcess(int(syscall.SIGKILL)); err != nil {
|
||||
// While normally we might "return err" here we're not going to
|
||||
// because if we can't stop the container by this point then
|
||||
// its probably because its already stopped. Meaning, between
|
||||
// the time of the IsRunning() call above and now it stopped.
|
||||
// Also, since the err return will be exec driver specific we can't
|
||||
// look for any particular (common) error that would indicate
|
||||
// that the process is already dead vs something else going wrong.
|
||||
// So, instead we'll give it up to 2 more seconds to complete and if
|
||||
// by that time the container is still running, then the error
|
||||
// we got is probably valid and so we return it to the caller.
|
||||
|
||||
if container.IsRunning() {
|
||||
container.WaitStop(2 * time.Second)
|
||||
if container.IsRunning() {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Wait for the process to die, in last resort, try to kill the process directly
|
||||
if err := killProcessDirectly(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
container.WaitStop(-1 * time.Second)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop halts a container by sending a stop signal, waiting for the given
|
||||
// duration in seconds, and then calling SIGKILL and waiting for the
|
||||
// process to exit. If a negative duration is given, Stop will wait
|
||||
// for the initial signal forever. If the container is not running Stop returns
|
||||
// immediately.
|
||||
func (container *Container) Stop(seconds int) error {
|
||||
if !container.IsRunning() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 1. Send a SIGTERM
|
||||
if err := container.killPossiblyDeadProcess(container.stopSignal()); err != nil {
|
||||
logrus.Infof("Failed to send SIGTERM to the process, force killing")
|
||||
if err := container.killPossiblyDeadProcess(9); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Wait for the process to exit on its own
|
||||
if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil {
|
||||
logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds)
|
||||
// 3. If it doesn't, then send SIGKILL
|
||||
if err := container.Kill(); err != nil {
|
||||
container.WaitStop(-1 * time.Second)
|
||||
logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
|
||||
}
|
||||
}
|
||||
|
||||
container.logEvent("stop")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Restart attempts to gracefully stop and then start the
|
||||
// container. When stopping, wait for the given duration in seconds to
|
||||
// gracefully stop, before forcefully terminating the container. If
|
||||
// given a negative duration, wait forever for a graceful stop.
|
||||
func (container *Container) Restart(seconds int) error {
|
||||
// Avoid unnecessarily unmounting and then directly mounting
|
||||
// the container when the container stops and then starts
|
||||
// again
|
||||
if err := container.Mount(); err == nil {
|
||||
defer container.Unmount()
|
||||
}
|
||||
|
||||
if err := container.Stop(seconds); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := container.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
container.logEvent("restart")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Resize changes the TTY of the process running inside the container
|
||||
// to the given height and width. The container must be running.
|
||||
func (container *Container) Resize(h, w int) error {
|
||||
|
|
|
@ -64,7 +64,7 @@ type Container struct {
|
|||
func killProcessDirectly(container *Container) error {
|
||||
if _, err := container.WaitStop(10 * time.Second); err != nil {
|
||||
// Ensure that we don't kill ourselves
|
||||
if pid := container.getPID(); pid != 0 {
|
||||
if pid := container.GetPID(); pid != 0 {
|
||||
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID))
|
||||
if err := syscall.Kill(pid, 9); err != nil {
|
||||
if err != syscall.ESRCH {
|
||||
|
|
|
@ -838,7 +838,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
|
|||
return d, nil
|
||||
}
|
||||
|
||||
func stopContainer(c *Container) error {
|
||||
func (daemon *Daemon) shutdownContainer(c *Container) error {
|
||||
// TODO(windows): Handle docker restart with paused containers
|
||||
if c.isPaused() {
|
||||
// To terminate a process in freezer cgroup, we should send
|
||||
|
@ -869,7 +869,7 @@ func stopContainer(c *Container) error {
|
|||
}
|
||||
}
|
||||
// If container failed to exit in 10 seconds of SIGTERM, then using the force
|
||||
if err := c.Stop(10); err != nil {
|
||||
if err := daemon.containerStop(c, 10); err != nil {
|
||||
return fmt.Errorf("Stop container %s with error: %v", c.ID, err)
|
||||
}
|
||||
|
||||
|
@ -891,7 +891,7 @@ func (daemon *Daemon) Shutdown() error {
|
|||
group.Add(1)
|
||||
go func(c *Container) {
|
||||
defer group.Done()
|
||||
if err := stopContainer(c); err != nil {
|
||||
if err := daemon.shutdownContainer(c); err != nil {
|
||||
logrus.Errorf("Stop container error: %v", err)
|
||||
return
|
||||
}
|
||||
|
|
|
@ -205,6 +205,11 @@ func (d Docker) GetCachedImage(imgID string, cfg *runconfig.Config) (string, err
|
|||
return cache.ID, nil
|
||||
}
|
||||
|
||||
// Kill stops the container execution abruptly.
|
||||
func (d Docker) Kill(container *daemon.Container) error {
|
||||
return d.Daemon.Kill(container)
|
||||
}
|
||||
|
||||
// Following is specific to builder contexts
|
||||
|
||||
// DetectContextFromRemoteURL returns a context and in certain cases the name of the dockerfile to be used
|
||||
|
|
|
@ -71,7 +71,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) {
|
|||
if !forceRemove {
|
||||
return derr.ErrorCodeRmRunning
|
||||
}
|
||||
if err := container.Kill(); err != nil {
|
||||
if err := daemon.Kill(container); err != nil {
|
||||
return derr.ErrorCodeRmFailed.WithArgs(err)
|
||||
}
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) {
|
|||
// if stats are currently getting collected.
|
||||
daemon.statsCollector.stopCollection(container)
|
||||
|
||||
if err = container.Stop(3); err != nil {
|
||||
if err = daemon.containerStop(container, 3); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
10
daemon/events.go
Normal file
10
daemon/events.go
Normal file
|
@ -0,0 +1,10 @@
|
|||
package daemon
|
||||
|
||||
// logContainerEvent generates an event related to a container.
|
||||
func (daemon *Daemon) logContainerEvent(container *Container, action string) {
|
||||
daemon.EventsService.Log(
|
||||
action,
|
||||
container.ID,
|
||||
container.Config.Image,
|
||||
)
|
||||
}
|
|
@ -4,7 +4,10 @@ import (
|
|||
"fmt"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
derr "github.com/docker/docker/errors"
|
||||
"github.com/docker/docker/pkg/signal"
|
||||
)
|
||||
|
||||
|
@ -24,14 +27,96 @@ func (daemon *Daemon) ContainerKill(name string, sig uint64) error {
|
|||
|
||||
// If no signal is passed, or SIGKILL, perform regular Kill (SIGKILL + wait())
|
||||
if sig == 0 || syscall.Signal(sig) == syscall.SIGKILL {
|
||||
if err := container.Kill(); err != nil {
|
||||
if err := daemon.Kill(container); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Otherwise, just send the requested signal
|
||||
if err := container.killSig(int(sig)); err != nil {
|
||||
if err := daemon.killWithSignal(container, int(sig)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// killWithSignal sends the container the given signal. This wrapper for the
|
||||
// host specific kill command prepares the container before attempting
|
||||
// to send the signal. An error is returned if the container is paused
|
||||
// or not running, or if there is a problem returned from the
|
||||
// underlying kill command.
|
||||
func (daemon *Daemon) killWithSignal(container *Container, sig int) error {
|
||||
logrus.Debugf("Sending %d to %s", sig, container.ID)
|
||||
container.Lock()
|
||||
defer container.Unlock()
|
||||
|
||||
// We could unpause the container for them rather than returning this error
|
||||
if container.Paused {
|
||||
return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID)
|
||||
}
|
||||
|
||||
if !container.Running {
|
||||
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
|
||||
}
|
||||
|
||||
container.ExitOnNext()
|
||||
|
||||
// if the container is currently restarting we do not need to send the signal
|
||||
// to the process. Telling the monitor that it should exit on it's next event
|
||||
// loop is enough
|
||||
if container.Restarting {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := daemon.kill(container, sig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
daemon.logContainerEvent(container, "kill")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Kill forcefully terminates a container.
|
||||
func (daemon *Daemon) Kill(container *Container) error {
|
||||
if !container.IsRunning() {
|
||||
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
|
||||
}
|
||||
|
||||
// 1. Send SIGKILL
|
||||
if err := daemon.killPossiblyDeadProcess(container, int(syscall.SIGKILL)); err != nil {
|
||||
// While normally we might "return err" here we're not going to
|
||||
// because if we can't stop the container by this point then
|
||||
// its probably because its already stopped. Meaning, between
|
||||
// the time of the IsRunning() call above and now it stopped.
|
||||
// Also, since the err return will be exec driver specific we can't
|
||||
// look for any particular (common) error that would indicate
|
||||
// that the process is already dead vs something else going wrong.
|
||||
// So, instead we'll give it up to 2 more seconds to complete and if
|
||||
// by that time the container is still running, then the error
|
||||
// we got is probably valid and so we return it to the caller.
|
||||
|
||||
if container.IsRunning() {
|
||||
container.WaitStop(2 * time.Second)
|
||||
if container.IsRunning() {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Wait for the process to die, in last resort, try to kill the process directly
|
||||
if err := killProcessDirectly(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
container.WaitStop(-1 * time.Second)
|
||||
return nil
|
||||
}
|
||||
|
||||
// killPossibleDeadProcess is a wrapper aroung killSig() suppressing "no such process" error.
|
||||
func (daemon *Daemon) killPossiblyDeadProcess(container *Container, sig int) error {
|
||||
err := daemon.killWithSignal(container, sig)
|
||||
if err == syscall.ESRCH {
|
||||
logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.GetPID(), sig)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -15,8 +15,32 @@ func (daemon *Daemon) ContainerRestart(name string, seconds int) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := container.Restart(seconds); err != nil {
|
||||
if err := daemon.containerRestart(container, seconds); err != nil {
|
||||
return derr.ErrorCodeCantRestart.WithArgs(name, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// containerRestart attempts to gracefully stop and then start the
|
||||
// container. When stopping, wait for the given duration in seconds to
|
||||
// gracefully stop, before forcefully terminating the container. If
|
||||
// given a negative duration, wait forever for a graceful stop.
|
||||
func (daemon *Daemon) containerRestart(container *Container, seconds int) error {
|
||||
// Avoid unnecessarily unmounting and then directly mounting
|
||||
// the container when the container stops and then starts
|
||||
// again
|
||||
if err := container.Mount(); err == nil {
|
||||
defer container.Unmount()
|
||||
}
|
||||
|
||||
if err := daemon.containerStop(container, seconds); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := container.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
daemon.logContainerEvent(container, "restart")
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -134,7 +134,7 @@ func (s *State) waitRunning(timeout time.Duration) (int, error) {
|
|||
if err := wait(waitChan, timeout); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return s.getPID(), nil
|
||||
return s.GetPID(), nil
|
||||
}
|
||||
|
||||
// WaitStop waits until state is stopped. If state already stopped it returns
|
||||
|
@ -164,7 +164,7 @@ func (s *State) IsRunning() bool {
|
|||
}
|
||||
|
||||
// GetPID holds the process id of a container.
|
||||
func (s *State) getPID() int {
|
||||
func (s *State) GetPID() int {
|
||||
s.Lock()
|
||||
res := s.Pid
|
||||
s.Unlock()
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
derr "github.com/docker/docker/errors"
|
||||
)
|
||||
|
||||
|
@ -18,8 +21,40 @@ func (daemon *Daemon) ContainerStop(name string, seconds int) error {
|
|||
if !container.IsRunning() {
|
||||
return derr.ErrorCodeStopped
|
||||
}
|
||||
if err := container.Stop(seconds); err != nil {
|
||||
if err := daemon.containerStop(container, seconds); err != nil {
|
||||
return derr.ErrorCodeCantStop.WithArgs(name, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// containerStop halts a container by sending a stop signal, waiting for the given
|
||||
// duration in seconds, and then calling SIGKILL and waiting for the
|
||||
// process to exit. If a negative duration is given, Stop will wait
|
||||
// for the initial signal forever. If the container is not running Stop returns
|
||||
// immediately.
|
||||
func (daemon *Daemon) containerStop(container *Container, seconds int) error {
|
||||
if !container.IsRunning() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 1. Send a SIGTERM
|
||||
if err := daemon.killPossiblyDeadProcess(container, container.stopSignal()); err != nil {
|
||||
logrus.Infof("Failed to send SIGTERM to the process, force killing")
|
||||
if err := daemon.killPossiblyDeadProcess(container, 9); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Wait for the process to exit on its own
|
||||
if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil {
|
||||
logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds)
|
||||
// 3. If it doesn't, then send SIGKILL
|
||||
if err := daemon.Kill(container); err != nil {
|
||||
container.WaitStop(-1 * time.Second)
|
||||
logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
|
||||
}
|
||||
}
|
||||
|
||||
daemon.logContainerEvent(container, "stop")
|
||||
return nil
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue