Fix critical bug: can't restart a restarting container
When user try to restart a restarting container, docker client report error: "container is already active", and container will be stopped instead be restarted which is seriously wrong. What's more critical is that when user try to start this container again, it will always fail. This error can also be reproduced with a `docker stop`+`docker start`. And this commit will fix the bug. Signed-off-by: Zhang Wei <zhangwei555@huawei.com>
This commit is contained in:
parent
3e14070c88
commit
a705e166cf
5 changed files with 41 additions and 3 deletions
|
@ -3,6 +3,7 @@ package daemon
|
|||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
|
@ -81,7 +82,14 @@ func (daemon *Daemon) killWithSignal(container *container.Container, sig int) er
|
|||
}
|
||||
|
||||
if err := daemon.kill(container, sig); err != nil {
|
||||
return fmt.Errorf("Cannot kill container %s: %s", container.ID, err)
|
||||
err = fmt.Errorf("Cannot kill container %s: %s", container.ID, err)
|
||||
// if container or process not exists, ignore the error
|
||||
if strings.Contains(err.Error(), "container not found") ||
|
||||
strings.Contains(err.Error(), "no such process") {
|
||||
logrus.Warnf("%s", err.Error())
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
attributes := map[string]string{
|
||||
|
|
|
@ -218,3 +218,32 @@ func (s *DockerSuite) TestRestartPolicyAfterRestart(c *check.C) {
|
|||
err = waitInspect(id, "{{.State.Status}}", "running", 30*time.Second)
|
||||
c.Assert(err, check.IsNil)
|
||||
}
|
||||
|
||||
func (s *DockerSuite) TestRestartContainerwithRestartPolicy(c *check.C) {
|
||||
out1, _ := dockerCmd(c, "run", "-d", "--restart=on-failure:3", "busybox", "false")
|
||||
out2, _ := dockerCmd(c, "run", "-d", "--restart=always", "busybox", "false")
|
||||
|
||||
id1 := strings.TrimSpace(string(out1))
|
||||
id2 := strings.TrimSpace(string(out2))
|
||||
err := waitInspect(id1, "{{ .State.Restarting }} {{ .State.Running }}", "false false", 30*time.Second)
|
||||
c.Assert(err, checker.IsNil)
|
||||
|
||||
// TODO: fix racey problem during restart:
|
||||
// https://jenkins.dockerproject.org/job/Docker-PRs-Win2Lin/24665/console
|
||||
// Error response from daemon: Cannot restart container 6655f620d90b390527db23c0a15b3e46d86a58ecec20a5697ab228d860174251: remove /var/run/docker/libcontainerd/6655f620d90b390527db23c0a15b3e46d86a58ecec20a5697ab228d860174251/rootfs: device or resource busy
|
||||
if _, _, err := dockerCmdWithError("restart", id1); err != nil {
|
||||
// if restart met racey problem, try again
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
dockerCmd(c, "restart", id1)
|
||||
}
|
||||
if _, _, err := dockerCmdWithError("restart", id2); err != nil {
|
||||
// if restart met racey problem, try again
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
dockerCmd(c, "restart", id2)
|
||||
}
|
||||
|
||||
dockerCmd(c, "stop", id1)
|
||||
dockerCmd(c, "stop", id2)
|
||||
dockerCmd(c, "start", id1)
|
||||
dockerCmd(c, "start", id2)
|
||||
}
|
||||
|
|
|
@ -134,7 +134,7 @@ func (clnt *client) Create(containerID string, spec Spec, options ...CreateOptio
|
|||
defer clnt.unlock(containerID)
|
||||
|
||||
if ctr, err := clnt.getContainer(containerID); err == nil {
|
||||
if ctr.restarting { // docker doesn't actually call start if restart is on atm, but probably should in the future
|
||||
if ctr.restarting {
|
||||
ctr.restartManager.Cancel()
|
||||
ctr.clean()
|
||||
} else {
|
||||
|
|
|
@ -124,6 +124,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
|||
} else if restart {
|
||||
st.State = StateRestart
|
||||
ctr.restarting = true
|
||||
ctr.client.deleteContainer(e.Id)
|
||||
go func() {
|
||||
err := <-wait
|
||||
ctr.restarting = false
|
||||
|
|
|
@ -51,7 +51,7 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
|
|||
}()
|
||||
|
||||
if rm.canceled {
|
||||
return false, nil, nil
|
||||
return false, nil, fmt.Errorf("restartmanager canceled")
|
||||
}
|
||||
|
||||
if rm.active {
|
||||
|
|
Loading…
Add table
Reference in a new issue