From 740e26f384fe4fe70b5dd2ec5ef80cfdbafac177 Mon Sep 17 00:00:00 2001 From: Darren Stahl Date: Mon, 15 Aug 2016 16:51:45 -0700 Subject: [PATCH] Lock all calls to hcsshim to prevent close races Signed-off-by: Darren Stahl --- daemon/kill.go | 7 ++-- daemon/monitor.go | 8 +++-- daemon/stop.go | 18 +++++++++-- libcontainerd/client.go | 4 +-- libcontainerd/client_windows.go | 15 ++++++--- libcontainerd/container_windows.go | 52 +++++++++++++++++++++++------- libcontainerd/process_windows.go | 38 +++++++++++----------- 7 files changed, 95 insertions(+), 47 deletions(-) diff --git a/daemon/kill.go b/daemon/kill.go index 8ccbd0ade9..84186a9390 100644 --- a/daemon/kill.go +++ b/daemon/kill.go @@ -121,11 +121,8 @@ func (daemon *Daemon) Kill(container *container.Container) error { return nil } - if container.IsRunning() { - container.WaitStop(2 * time.Second) - if container.IsRunning() { - return err - } + if _, err2 := container.WaitStop(2 * time.Second); err2 != nil { + return err } } diff --git a/daemon/monitor.go b/daemon/monitor.go index aec771fd17..ce9fbab023 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -162,7 +162,9 @@ func (daemon *Daemon) AttachStreams(id string, iop libcontainerd.IOPipe) error { if iop.Stdin != nil { go func() { io.Copy(iop.Stdin, stdin) - iop.Stdin.Close() + if err := iop.Stdin.Close(); err != nil { + logrus.Error(err) + } }() } } else { @@ -172,7 +174,9 @@ func (daemon *Daemon) AttachStreams(id string, iop libcontainerd.IOPipe) error { if (c != nil && !c.Config.Tty) || (ec != nil && !ec.Tty && runtime.GOOS == "windows") { // tty is enabled, so dont close containerd's iopipe stdin. if iop.Stdin != nil { - iop.Stdin.Close() + if err := iop.Stdin.Close(); err != nil { + logrus.Error(err) + } } } } diff --git a/daemon/stop.go b/daemon/stop.go index 6faafa41cf..90b8898b24 100644 --- a/daemon/stop.go +++ b/daemon/stop.go @@ -46,9 +46,21 @@ func (daemon *Daemon) containerStop(container *container.Container, seconds int) stopSignal := container.StopSignal() // 1. Send a stop signal if err := daemon.killPossiblyDeadProcess(container, stopSignal); err != nil { - logrus.Infof("Failed to send signal %d to the process, force killing", stopSignal) - if err := daemon.killPossiblyDeadProcess(container, 9); err != nil { - return err + // While normally we might "return err" here we're not going to + // because if we can't stop the container by this point then + // its probably because its already stopped. Meaning, between + // the time of the IsRunning() call above and now it stopped. + // Also, since the err return will be environment specific we can't + // look for any particular (common) error that would indicate + // that the process is already dead vs something else going wrong. + // So, instead we'll give it up to 2 more seconds to complete and if + // by that time the container is still running, then the error + // we got is probably valid and so we force kill it. + if _, err := container.WaitStop(2 * time.Second); err != nil { + logrus.Infof("Container failed to stop after sending signal %d to the process, force killing", stopSignal) + if err := daemon.killPossiblyDeadProcess(container, 9); err != nil { + return err + } } } diff --git a/libcontainerd/client.go b/libcontainerd/client.go index 7e8e47bcfa..c14c1c5e46 100644 --- a/libcontainerd/client.go +++ b/libcontainerd/client.go @@ -29,9 +29,9 @@ func (clnt *client) appendContainer(cont *container) { clnt.containers[cont.containerID] = cont clnt.mapMutex.Unlock() } -func (clnt *client) deleteContainer(friendlyName string) { +func (clnt *client) deleteContainer(containerID string) { clnt.mapMutex.Lock() - delete(clnt.containers, friendlyName) + delete(clnt.containers, containerID) clnt.mapMutex.Unlock() } diff --git a/libcontainerd/client_windows.go b/libcontainerd/client_windows.go index 07cf17abe3..f0da3e2262 100644 --- a/libcontainerd/client_windows.go +++ b/libcontainerd/client_windows.go @@ -38,6 +38,8 @@ const defaultOwner = "docker" // Create is the entrypoint to create a container from a spec, and if successfully // created, start it too. func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) error { + clnt.lock(containerID) + defer clnt.unlock(containerID) logrus.Debugln("libcontainerd: client.Create() with spec", spec) configuration := &hcsshim.ContainerConfig{ @@ -220,6 +222,13 @@ func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendly return err } + pid := newProcess.Pid() + openedProcess, err := container.hcsContainer.OpenProcess(pid) + if err != nil { + logrus.Errorf("AddProcess %s OpenProcess() failed %s", containerID, err) + return err + } + stdin, stdout, stderr, err = newProcess.Stdio() if err != nil { logrus.Errorf("libcontainerd: %s getting std pipes failed %s", containerID, err) @@ -237,8 +246,6 @@ func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendly iopipe.Stderr = openReaderFromPipe(stderr) } - pid := newProcess.Pid() - proc := &process{ processCommon: processCommon{ containerID: containerID, @@ -247,7 +254,7 @@ func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendly systemPid: uint32(pid), }, commandLine: createProcessParms.CommandLine, - hcsProcess: newProcess, + hcsProcess: openedProcess, } // Add the process to the container's list of processes @@ -279,7 +286,7 @@ func (clnt *client) Signal(containerID string, sig int) error { err error ) - // Get the container as we need it to find the pid of the process. + // Get the container as we need it to get the container handle. clnt.lock(containerID) defer clnt.unlock(containerID) if cont, err = clnt.getContainer(containerID); err != nil { diff --git a/libcontainerd/container_windows.go b/libcontainerd/container_windows.go index f3214d8a77..fdc6fa9915 100644 --- a/libcontainerd/container_windows.go +++ b/libcontainerd/container_windows.go @@ -35,6 +35,8 @@ func (ctr *container) newProcess(friendlyName string) *process { } } +// start starts a created container. +// Caller needs to lock container ID before calling this method. func (ctr *container) start() error { var err error isServicing := false @@ -77,7 +79,7 @@ func (ctr *container) start() error { createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ") // Start the command running in the container. - hcsProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms) + newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms) if err != nil { logrus.Errorf("libcontainerd: CreateProcess() failed %s", err) if err := ctr.terminate(); err != nil { @@ -89,10 +91,21 @@ func (ctr *container) start() error { } ctr.startedAt = time.Now() + pid := newProcess.Pid() + openedProcess, err := ctr.hcsContainer.OpenProcess(pid) + if err != nil { + logrus.Errorf("OpenProcess() failed %s", err) + if err := ctr.terminate(); err != nil { + logrus.Errorf("Failed to cleanup after a failed OpenProcess. %s", err) + } else { + logrus.Debugln("Cleaned up after failed OpenProcess by calling Terminate") + } + return err + } + // Save the hcs Process and PID ctr.process.friendlyName = InitFriendlyName - pid := hcsProcess.Pid() - ctr.process.hcsProcess = hcsProcess + ctr.process.hcsProcess = openedProcess // If this is a servicing container, wait on the process synchronously here and // if it succeeds, wait for it cleanly shutdown and merge into the parent container. @@ -109,7 +122,7 @@ func (ctr *container) start() error { var stdout, stderr io.ReadCloser var stdin io.WriteCloser - stdin, stdout, stderr, err = hcsProcess.Stdio() + stdin, stdout, stderr, err = newProcess.Stdio() if err != nil { logrus.Errorf("libcontainerd: failed to get stdio pipes: %s", err) if err := ctr.terminate(); err != nil { @@ -120,7 +133,7 @@ func (ctr *container) start() error { iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal} - iopipe.Stdin = createStdInCloser(stdin, hcsProcess) + iopipe.Stdin = createStdInCloser(stdin, newProcess) // Convert io.ReadClosers to io.Readers if stdout != nil { @@ -150,6 +163,7 @@ func (ctr *container) start() error { State: StateStart, Pid: ctr.systemPid, // Not sure this is needed? Double-check monitor.go in daemon BUGBUG @jhowardmsft }} + logrus.Debugf("libcontainerd: start() completed OK, %+v", si) return ctr.client.backend.StateChanged(ctr.containerID, si) } @@ -181,10 +195,6 @@ func (ctr *container) waitProcessExitCode(process *process) int { // has exited to avoid a container being dropped on the floor. } - if err := process.hcsProcess.Close(); err != nil { - logrus.Errorf("libcontainerd: hcsProcess.Close(): %v", err) - } - return exitCode } @@ -196,6 +206,8 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid) exitCode := ctr.waitProcessExitCode(process) + // Lock the container while shutting down + ctr.client.lock(ctr.containerID) // Assume the container has exited si := StateInfo{ @@ -211,6 +223,7 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err // But it could have been an exec'd process which exited if !isFirstProcessToStart { si.State = StateExitProcess + ctr.cleanProcess(process.friendlyName) } else { updatePending, err := ctr.hcsContainer.HasPendingUpdates() if err != nil { @@ -236,6 +249,7 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err } else if restart { si.State = StateRestart ctr.restarting = true + ctr.client.deleteContainer(ctr.containerID) waitRestart = wait } } @@ -243,10 +257,17 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err // Remove process from list if we have exited // We need to do so here in case the Message Handler decides to restart it. if si.State == StateExit { - ctr.client.deleteContainer(ctr.friendlyName) + ctr.client.deleteContainer(ctr.containerID) } } + if err := process.hcsProcess.Close(); err != nil { + logrus.Errorf("libcontainerd: hcsProcess.Close(): %v", err) + } + + // Unlock here before we call back into the daemon to update state + ctr.client.unlock(ctr.containerID) + // Call into the backend to notify it of the state change. logrus.Debugf("libcontainerd: waitExit() calling backend.StateChanged %+v", si) if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil { @@ -256,7 +277,6 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err go func() { err := <-waitRestart ctr.restarting = false - ctr.client.deleteContainer(ctr.friendlyName) if err == nil { if err = ctr.client.Create(ctr.containerID, "", "", ctr.ociSpec, ctr.options...); err != nil { logrus.Errorf("libcontainerd: error restarting %v", err) @@ -275,6 +295,14 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err return nil } +// cleanProcess removes process from the map. +// Caller needs to lock container ID before calling this method. +func (ctr *container) cleanProcess(id string) { + delete(ctr.processes, id) +} + +// shutdown shuts down the container in HCS +// Caller needs to lock container ID before calling this method. func (ctr *container) shutdown() error { const shutdownTimeout = time.Minute * 5 err := ctr.hcsContainer.Shutdown() @@ -296,6 +324,8 @@ func (ctr *container) shutdown() error { return nil } +// terminate terminates the container in HCS +// Caller needs to lock container ID before calling this method. func (ctr *container) terminate() error { const terminateTimeout = time.Minute * 5 err := ctr.hcsContainer.Terminate() diff --git a/libcontainerd/process_windows.go b/libcontainerd/process_windows.go index d783c2761a..038cbde0ec 100644 --- a/libcontainerd/process_windows.go +++ b/libcontainerd/process_windows.go @@ -4,6 +4,7 @@ import ( "io" "github.com/Microsoft/hcsshim" + "github.com/docker/docker/pkg/ioutils" ) // process keeps the state for both main container process and exec process. @@ -29,26 +30,23 @@ func openReaderFromPipe(p io.ReadCloser) io.Reader { return r } -type stdInCloser struct { - io.WriteCloser - hcsshim.Process -} +func createStdInCloser(pipe io.WriteCloser, process hcsshim.Process) io.WriteCloser { + return ioutils.NewWriteCloserWrapper(pipe, func() error { + if err := pipe.Close(); err != nil { + return err + } -func createStdInCloser(pipe io.WriteCloser, process hcsshim.Process) *stdInCloser { - return &stdInCloser{ - WriteCloser: pipe, - Process: process, - } -} + // We do not need to lock container ID here, even though + // we are calling into hcsshim. This is safe, because the + // only place that closes this process handle is this method. + err := process.CloseStdin() + if err != nil && !hcsshim.IsNotExist(err) { + // This error will occur if the compute system is currently shutting down + if perr, ok := err.(*hcsshim.ProcessError); ok && perr.Err != hcsshim.ErrVmcomputeOperationInvalidState { + return err + } + } -func (stdin *stdInCloser) Close() error { - if err := stdin.WriteCloser.Close(); err != nil { - return err - } - - return stdin.Process.CloseStdin() -} - -func (stdin *stdInCloser) Write(p []byte) (n int, err error) { - return stdin.WriteCloser.Write(p) + return process.Close() + }) }