Decouple daemon and container to stop and kill containers.

Signed-off-by: David Calavera <david.calavera@gmail.com>
This commit is contained in:
David Calavera 2015-11-02 18:25:26 -05:00
parent 581380cc6c
commit 4f2a5ba360
12 changed files with 177 additions and 149 deletions

View file

@ -128,6 +128,8 @@ type Docker interface {
// Release releases a list of images that were retained for the time of a build. // Release releases a list of images that were retained for the time of a build.
// TODO: remove // TODO: remove
Release(sessionID string, activeImages []string) Release(sessionID string, activeImages []string)
// Kill stops the container execution abruptly.
Kill(c *daemon.Container) error
} }
// ImageCache abstracts an image cache store. // ImageCache abstracts an image cache store.

View file

@ -559,7 +559,7 @@ func (b *Builder) run(c *daemon.Container) error {
select { select {
case <-b.cancelled: case <-b.cancelled:
logrus.Debugln("Build cancelled, killing and removing container:", c.ID) logrus.Debugln("Build cancelled, killing and removing container:", c.ID)
c.Kill() b.docker.Kill(c)
b.removeContainer(c.ID) b.removeContainer(c.ID)
case <-finished: case <-finished:
} }

View file

@ -337,51 +337,10 @@ func (container *Container) cleanup() {
} }
} }
// killSig sends the container the given signal. This wrapper for the // ExitOnNext signals to the monitor that it should not restart the container
// host specific kill command prepares the container before attempting // after we send the kill signal.
// to send the signal. An error is returned if the container is paused func (container *Container) ExitOnNext() {
// or not running, or if there is a problem returned from the
// underlying kill command.
func (container *Container) killSig(sig int) error {
logrus.Debugf("Sending %d to %s", sig, container.ID)
container.Lock()
defer container.Unlock()
// We could unpause the container for them rather than returning this error
if container.Paused {
return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID)
}
if !container.Running {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}
// signal to the monitor that it should not restart the container
// after we send the kill signal
container.monitor.ExitOnNext() container.monitor.ExitOnNext()
// if the container is currently restarting we do not need to send the signal
// to the process. Telling the monitor that it should exit on it's next event
// loop is enough
if container.Restarting {
return nil
}
if err := container.daemon.kill(container, sig); err != nil {
return err
}
container.logEvent("kill")
return nil
}
// Wrapper aroung killSig() suppressing "no such process" error.
func (container *Container) killPossiblyDeadProcess(sig int) error {
err := container.killSig(sig)
if err == syscall.ESRCH {
logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.getPID(), sig)
return nil
}
return err
} }
func (container *Container) pause() error { func (container *Container) pause() error {
@ -428,98 +387,6 @@ func (container *Container) unpause() error {
return nil return nil
} }
// Kill forcefully terminates a container.
func (container *Container) Kill() error {
if !container.IsRunning() {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}
// 1. Send SIGKILL
if err := container.killPossiblyDeadProcess(int(syscall.SIGKILL)); err != nil {
// While normally we might "return err" here we're not going to
// because if we can't stop the container by this point then
// its probably because its already stopped. Meaning, between
// the time of the IsRunning() call above and now it stopped.
// Also, since the err return will be exec driver specific we can't
// look for any particular (common) error that would indicate
// that the process is already dead vs something else going wrong.
// So, instead we'll give it up to 2 more seconds to complete and if
// by that time the container is still running, then the error
// we got is probably valid and so we return it to the caller.
if container.IsRunning() {
container.WaitStop(2 * time.Second)
if container.IsRunning() {
return err
}
}
}
// 2. Wait for the process to die, in last resort, try to kill the process directly
if err := killProcessDirectly(container); err != nil {
return err
}
container.WaitStop(-1 * time.Second)
return nil
}
// Stop halts a container by sending a stop signal, waiting for the given
// duration in seconds, and then calling SIGKILL and waiting for the
// process to exit. If a negative duration is given, Stop will wait
// for the initial signal forever. If the container is not running Stop returns
// immediately.
func (container *Container) Stop(seconds int) error {
if !container.IsRunning() {
return nil
}
// 1. Send a SIGTERM
if err := container.killPossiblyDeadProcess(container.stopSignal()); err != nil {
logrus.Infof("Failed to send SIGTERM to the process, force killing")
if err := container.killPossiblyDeadProcess(9); err != nil {
return err
}
}
// 2. Wait for the process to exit on its own
if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil {
logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds)
// 3. If it doesn't, then send SIGKILL
if err := container.Kill(); err != nil {
container.WaitStop(-1 * time.Second)
logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
}
}
container.logEvent("stop")
return nil
}
// Restart attempts to gracefully stop and then start the
// container. When stopping, wait for the given duration in seconds to
// gracefully stop, before forcefully terminating the container. If
// given a negative duration, wait forever for a graceful stop.
func (container *Container) Restart(seconds int) error {
// Avoid unnecessarily unmounting and then directly mounting
// the container when the container stops and then starts
// again
if err := container.Mount(); err == nil {
defer container.Unmount()
}
if err := container.Stop(seconds); err != nil {
return err
}
if err := container.Start(); err != nil {
return err
}
container.logEvent("restart")
return nil
}
// Resize changes the TTY of the process running inside the container // Resize changes the TTY of the process running inside the container
// to the given height and width. The container must be running. // to the given height and width. The container must be running.
func (container *Container) Resize(h, w int) error { func (container *Container) Resize(h, w int) error {

View file

@ -64,7 +64,7 @@ type Container struct {
func killProcessDirectly(container *Container) error { func killProcessDirectly(container *Container) error {
if _, err := container.WaitStop(10 * time.Second); err != nil { if _, err := container.WaitStop(10 * time.Second); err != nil {
// Ensure that we don't kill ourselves // Ensure that we don't kill ourselves
if pid := container.getPID(); pid != 0 { if pid := container.GetPID(); pid != 0 {
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID)) logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID))
if err := syscall.Kill(pid, 9); err != nil { if err := syscall.Kill(pid, 9); err != nil {
if err != syscall.ESRCH { if err != syscall.ESRCH {

View file

@ -838,7 +838,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
return d, nil return d, nil
} }
func stopContainer(c *Container) error { func (daemon *Daemon) shutdownContainer(c *Container) error {
// TODO(windows): Handle docker restart with paused containers // TODO(windows): Handle docker restart with paused containers
if c.isPaused() { if c.isPaused() {
// To terminate a process in freezer cgroup, we should send // To terminate a process in freezer cgroup, we should send
@ -869,7 +869,7 @@ func stopContainer(c *Container) error {
} }
} }
// If container failed to exit in 10 seconds of SIGTERM, then using the force // If container failed to exit in 10 seconds of SIGTERM, then using the force
if err := c.Stop(10); err != nil { if err := daemon.containerStop(c, 10); err != nil {
return fmt.Errorf("Stop container %s with error: %v", c.ID, err) return fmt.Errorf("Stop container %s with error: %v", c.ID, err)
} }
@ -891,7 +891,7 @@ func (daemon *Daemon) Shutdown() error {
group.Add(1) group.Add(1)
go func(c *Container) { go func(c *Container) {
defer group.Done() defer group.Done()
if err := stopContainer(c); err != nil { if err := daemon.shutdownContainer(c); err != nil {
logrus.Errorf("Stop container error: %v", err) logrus.Errorf("Stop container error: %v", err)
return return
} }

View file

@ -205,6 +205,11 @@ func (d Docker) GetCachedImage(imgID string, cfg *runconfig.Config) (string, err
return cache.ID, nil return cache.ID, nil
} }
// Kill stops the container execution abruptly.
func (d Docker) Kill(container *daemon.Container) error {
return d.Daemon.Kill(container)
}
// Following is specific to builder contexts // Following is specific to builder contexts
// DetectContextFromRemoteURL returns a context and in certain cases the name of the dockerfile to be used // DetectContextFromRemoteURL returns a context and in certain cases the name of the dockerfile to be used

View file

@ -71,7 +71,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) {
if !forceRemove { if !forceRemove {
return derr.ErrorCodeRmRunning return derr.ErrorCodeRmRunning
} }
if err := container.Kill(); err != nil { if err := daemon.Kill(container); err != nil {
return derr.ErrorCodeRmFailed.WithArgs(err) return derr.ErrorCodeRmFailed.WithArgs(err)
} }
} }
@ -90,7 +90,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) {
// if stats are currently getting collected. // if stats are currently getting collected.
daemon.statsCollector.stopCollection(container) daemon.statsCollector.stopCollection(container)
if err = container.Stop(3); err != nil { if err = daemon.containerStop(container, 3); err != nil {
return err return err
} }

10
daemon/events.go Normal file
View file

@ -0,0 +1,10 @@
package daemon
// logContainerEvent generates an event related to a container.
func (daemon *Daemon) logContainerEvent(container *Container, action string) {
daemon.EventsService.Log(
action,
container.ID,
container.Config.Image,
)
}

View file

@ -4,7 +4,10 @@ import (
"fmt" "fmt"
"runtime" "runtime"
"syscall" "syscall"
"time"
"github.com/Sirupsen/logrus"
derr "github.com/docker/docker/errors"
"github.com/docker/docker/pkg/signal" "github.com/docker/docker/pkg/signal"
) )
@ -24,14 +27,96 @@ func (daemon *Daemon) ContainerKill(name string, sig uint64) error {
// If no signal is passed, or SIGKILL, perform regular Kill (SIGKILL + wait()) // If no signal is passed, or SIGKILL, perform regular Kill (SIGKILL + wait())
if sig == 0 || syscall.Signal(sig) == syscall.SIGKILL { if sig == 0 || syscall.Signal(sig) == syscall.SIGKILL {
if err := container.Kill(); err != nil { if err := daemon.Kill(container); err != nil {
return err return err
} }
} else { } else {
// Otherwise, just send the requested signal // Otherwise, just send the requested signal
if err := container.killSig(int(sig)); err != nil { if err := daemon.killWithSignal(container, int(sig)); err != nil {
return err return err
} }
} }
return nil return nil
} }
// killWithSignal sends the container the given signal. This wrapper for the
// host specific kill command prepares the container before attempting
// to send the signal. An error is returned if the container is paused
// or not running, or if there is a problem returned from the
// underlying kill command.
func (daemon *Daemon) killWithSignal(container *Container, sig int) error {
logrus.Debugf("Sending %d to %s", sig, container.ID)
container.Lock()
defer container.Unlock()
// We could unpause the container for them rather than returning this error
if container.Paused {
return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID)
}
if !container.Running {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}
container.ExitOnNext()
// if the container is currently restarting we do not need to send the signal
// to the process. Telling the monitor that it should exit on it's next event
// loop is enough
if container.Restarting {
return nil
}
if err := daemon.kill(container, sig); err != nil {
return err
}
daemon.logContainerEvent(container, "kill")
return nil
}
// Kill forcefully terminates a container.
func (daemon *Daemon) Kill(container *Container) error {
if !container.IsRunning() {
return derr.ErrorCodeNotRunning.WithArgs(container.ID)
}
// 1. Send SIGKILL
if err := daemon.killPossiblyDeadProcess(container, int(syscall.SIGKILL)); err != nil {
// While normally we might "return err" here we're not going to
// because if we can't stop the container by this point then
// its probably because its already stopped. Meaning, between
// the time of the IsRunning() call above and now it stopped.
// Also, since the err return will be exec driver specific we can't
// look for any particular (common) error that would indicate
// that the process is already dead vs something else going wrong.
// So, instead we'll give it up to 2 more seconds to complete and if
// by that time the container is still running, then the error
// we got is probably valid and so we return it to the caller.
if container.IsRunning() {
container.WaitStop(2 * time.Second)
if container.IsRunning() {
return err
}
}
}
// 2. Wait for the process to die, in last resort, try to kill the process directly
if err := killProcessDirectly(container); err != nil {
return err
}
container.WaitStop(-1 * time.Second)
return nil
}
// killPossibleDeadProcess is a wrapper aroung killSig() suppressing "no such process" error.
func (daemon *Daemon) killPossiblyDeadProcess(container *Container, sig int) error {
err := daemon.killWithSignal(container, sig)
if err == syscall.ESRCH {
logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.GetPID(), sig)
return nil
}
return err
}

View file

@ -15,8 +15,32 @@ func (daemon *Daemon) ContainerRestart(name string, seconds int) error {
if err != nil { if err != nil {
return err return err
} }
if err := container.Restart(seconds); err != nil { if err := daemon.containerRestart(container, seconds); err != nil {
return derr.ErrorCodeCantRestart.WithArgs(name, err) return derr.ErrorCodeCantRestart.WithArgs(name, err)
} }
return nil return nil
} }
// containerRestart attempts to gracefully stop and then start the
// container. When stopping, wait for the given duration in seconds to
// gracefully stop, before forcefully terminating the container. If
// given a negative duration, wait forever for a graceful stop.
func (daemon *Daemon) containerRestart(container *Container, seconds int) error {
// Avoid unnecessarily unmounting and then directly mounting
// the container when the container stops and then starts
// again
if err := container.Mount(); err == nil {
defer container.Unmount()
}
if err := daemon.containerStop(container, seconds); err != nil {
return err
}
if err := container.Start(); err != nil {
return err
}
daemon.logContainerEvent(container, "restart")
return nil
}

View file

@ -134,7 +134,7 @@ func (s *State) waitRunning(timeout time.Duration) (int, error) {
if err := wait(waitChan, timeout); err != nil { if err := wait(waitChan, timeout); err != nil {
return -1, err return -1, err
} }
return s.getPID(), nil return s.GetPID(), nil
} }
// WaitStop waits until state is stopped. If state already stopped it returns // WaitStop waits until state is stopped. If state already stopped it returns
@ -164,7 +164,7 @@ func (s *State) IsRunning() bool {
} }
// GetPID holds the process id of a container. // GetPID holds the process id of a container.
func (s *State) getPID() int { func (s *State) GetPID() int {
s.Lock() s.Lock()
res := s.Pid res := s.Pid
s.Unlock() s.Unlock()

View file

@ -1,6 +1,9 @@
package daemon package daemon
import ( import (
"time"
"github.com/Sirupsen/logrus"
derr "github.com/docker/docker/errors" derr "github.com/docker/docker/errors"
) )
@ -18,8 +21,40 @@ func (daemon *Daemon) ContainerStop(name string, seconds int) error {
if !container.IsRunning() { if !container.IsRunning() {
return derr.ErrorCodeStopped return derr.ErrorCodeStopped
} }
if err := container.Stop(seconds); err != nil { if err := daemon.containerStop(container, seconds); err != nil {
return derr.ErrorCodeCantStop.WithArgs(name, err) return derr.ErrorCodeCantStop.WithArgs(name, err)
} }
return nil return nil
} }
// containerStop halts a container by sending a stop signal, waiting for the given
// duration in seconds, and then calling SIGKILL and waiting for the
// process to exit. If a negative duration is given, Stop will wait
// for the initial signal forever. If the container is not running Stop returns
// immediately.
func (daemon *Daemon) containerStop(container *Container, seconds int) error {
if !container.IsRunning() {
return nil
}
// 1. Send a SIGTERM
if err := daemon.killPossiblyDeadProcess(container, container.stopSignal()); err != nil {
logrus.Infof("Failed to send SIGTERM to the process, force killing")
if err := daemon.killPossiblyDeadProcess(container, 9); err != nil {
return err
}
}
// 2. Wait for the process to exit on its own
if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil {
logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds)
// 3. If it doesn't, then send SIGKILL
if err := daemon.Kill(container); err != nil {
container.WaitStop(-1 * time.Second)
logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
}
}
daemon.logContainerEvent(container, "stop")
return nil
}