فهرست منبع

Daemon Restart: attempt to wait for container deps

This provides a best effort on daemon restarts to restart containers
which have linked containers that are not up yet instead of failing.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
Brian Goff 9 سال پیش
والد
کامیت
19762da67e
2فایلهای تغییر یافته به همراه107 افزوده شده و 27 حذف شده
  1. 43 27
      daemon/daemon.go
  2. 64 0
      integration-cli/docker_cli_daemon_test.go

+ 43 - 27
daemon/daemon.go

@@ -351,41 +351,57 @@ func (daemon *Daemon) restore() error {
 		}
 	}
 
-	group := sync.WaitGroup{}
+	restartContainers := make(map[*container.Container]chan struct{})
 	for _, c := range containers {
-		group.Add(1)
-
-		go func(container *container.Container, registered bool) {
-			defer group.Done()
-
-			if !registered {
-				// Try to set the default name for a container if it exists prior to links
-				container.Name, err = daemon.generateNewName(container.ID)
-				if err != nil {
-					logrus.Debugf("Setting default id - %s", err)
-				}
+		if !c.registered {
+			// Try to set the default name for a container if it exists prior to links
+			c.container.Name, err = daemon.generateNewName(c.container.ID)
+			if err != nil {
+				logrus.Debugf("Setting default id - %s", err)
 			}
-			if err := daemon.registerName(container); err != nil {
-				logrus.Errorf("Failed to register container %s: %s", container.ID, err)
-				return
+			if err := daemon.registerName(c.container); err != nil {
+				logrus.Errorf("Failed to register container %s: %s", c.container.ID, err)
+				continue
 			}
+		}
 
-			if err := daemon.Register(container); err != nil {
-				logrus.Errorf("Failed to register container %s: %s", container.ID, err)
-				// The container register failed should not be started.
-				return
-			}
+		if err := daemon.Register(c.container); err != nil {
+			logrus.Errorf("Failed to register container %s: %s", c.container.ID, err)
+			continue
+		}
+		// get list of containers we need to restart
+		if daemon.configStore.AutoRestart && c.container.ShouldRestart() {
+			restartContainers[c.container] = make(chan struct{})
+		}
+	}
 
-			// check the restart policy on the containers and restart any container with
-			// the restart policy of "always"
-			if daemon.configStore.AutoRestart && container.ShouldRestart() {
-				logrus.Debugf("Starting container %s", container.ID)
+	group := sync.WaitGroup{}
+	for c, notifier := range restartContainers {
+		group.Add(1)
+		go func(container *container.Container, chNotify chan struct{}) {
+			defer group.Done()
+			logrus.Debugf("Starting container %s", container.ID)
 
-				if err := daemon.containerStart(container); err != nil {
-					logrus.Errorf("Failed to start container %s: %s", container.ID, err)
+			// ignore errors here as this is a best effort to wait for children to be
+			//   running before we try to start the container
+			children, err := daemon.children(container.Name)
+			if err != nil {
+				logrus.Warnf("error getting children for %s: %v", container.Name, err)
+			}
+			timeout := time.After(5 * time.Second)
+			for _, child := range children {
+				if notifier, exists := restartContainers[child]; exists {
+					select {
+					case <-notifier:
+					case <-timeout:
+					}
 				}
 			}
-		}(c.container, c.registered)
+			if err := daemon.containerStart(container); err != nil {
+				logrus.Errorf("Failed to start container %s: %s", container.ID, err)
+			}
+			close(chNotify)
+		}(c, notifier)
 	}
 	group.Wait()
 

+ 64 - 0
integration-cli/docker_cli_daemon_test.go

@@ -14,6 +14,7 @@ import (
 	"regexp"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/docker/docker/pkg/integration/checker"
@@ -1878,3 +1879,66 @@ func (s *DockerDaemonSuite) TestDaemonNoSpaceleftOnDeviceError(c *check.C) {
 	out, err := s.d.Cmd("pull", "registry:2")
 	c.Assert(out, check.Not(check.Equals), 1, check.Commentf("no space left on device"))
 }
+
+// Test daemon restart with container links + auto restart
+func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
+	d := NewDaemon(c)
+	err := d.StartWithBusybox()
+	c.Assert(err, checker.IsNil)
+
+	parent1Args := []string{}
+	parent2Args := []string{}
+	wg := sync.WaitGroup{}
+	maxChildren := 10
+	chErr := make(chan error, maxChildren)
+
+	for i := 0; i < maxChildren; i++ {
+		wg.Add(1)
+		name := fmt.Sprintf("test%d", i)
+
+		if i < maxChildren/2 {
+			parent1Args = append(parent1Args, []string{"--link", name}...)
+		} else {
+			parent2Args = append(parent2Args, []string{"--link", name}...)
+		}
+
+		go func() {
+			_, err = d.Cmd("run", "-d", "--name", name, "--restart=always", "busybox", "top")
+			chErr <- err
+			wg.Done()
+		}()
+	}
+
+	wg.Wait()
+	close(chErr)
+	for err := range chErr {
+		c.Assert(err, check.IsNil)
+	}
+
+	parent1Args = append([]string{"run", "-d"}, parent1Args...)
+	parent1Args = append(parent1Args, []string{"--name=parent1", "--restart=always", "busybox", "top"}...)
+	parent2Args = append([]string{"run", "-d"}, parent2Args...)
+	parent2Args = append(parent2Args, []string{"--name=parent2", "--restart=always", "busybox", "top"}...)
+
+	_, err = d.Cmd(parent1Args[0], parent1Args[1:]...)
+	c.Assert(err, check.IsNil)
+	_, err = d.Cmd(parent2Args[0], parent2Args[1:]...)
+	c.Assert(err, check.IsNil)
+
+	err = d.Stop()
+	c.Assert(err, check.IsNil)
+	// clear the log file -- we don't need any of it but may for the next part
+	// can ignore the error here, this is just a cleanup
+	os.Truncate(d.LogfileName(), 0)
+	err = d.Start()
+	c.Assert(err, check.IsNil)
+
+	for _, num := range []string{"1", "2"} {
+		out, err := d.Cmd("inspect", "-f", "{{ .State.Running }}", "parent"+num)
+		c.Assert(err, check.IsNil)
+		if strings.TrimSpace(out) != "true" {
+			log, _ := ioutil.ReadFile(d.LogfileName())
+			c.Fatalf("parent container is not running\n%s", string(log))
+		}
+	}
+}