Explorar el Código

Merge pull request #26449 from mrjana/net

Fix autostart for swarm scope connected containers
Madhu Venugopal hace 8 años
padre
commit
1d76ab4f80

+ 5 - 0
cmd/dockerd/daemon.go

@@ -262,6 +262,11 @@ func (cli *DaemonCli) start(opts daemonOptions) (err error) {
 		logrus.Fatalf("Error creating cluster component: %v", err)
 	}
 
+	// Restart all autostart containers which has a swarm endpoint
+	// and is not yet running now that we have successfully
+	// initialized the cluster.
+	d.RestartSwarmContainers()
+
 	logrus.Info("Daemon has completed initialization")
 
 	logrus.WithFields(logrus.Fields{

+ 28 - 9
daemon/cluster/cluster.go

@@ -135,10 +135,11 @@ type Cluster struct {
 // helps in identifying the attachment ID via the taskID and the
 // corresponding attachment configuration obtained from the manager.
 type attacher struct {
-	taskID       string
-	config       *network.NetworkingConfig
-	attachWaitCh chan *network.NetworkingConfig
-	detachWaitCh chan struct{}
+	taskID           string
+	config           *network.NetworkingConfig
+	attachWaitCh     chan *network.NetworkingConfig
+	attachCompleteCh chan struct{}
+	detachWaitCh     chan struct{}
 }
 
 type node struct {
@@ -1262,12 +1263,24 @@ func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID,
 	agent := c.node.Agent()
 	c.RUnlock()
 
-	if ok && attacher != nil && attacher.detachWaitCh != nil {
+	if ok && attacher != nil &&
+		attacher.detachWaitCh != nil &&
+		attacher.attachCompleteCh != nil {
+		// Attachment may be in progress still so wait for
+		// attachment to complete.
 		select {
-		case <-attacher.detachWaitCh:
+		case <-attacher.attachCompleteCh:
 		case <-ctx.Done():
 			return ctx.Err()
 		}
+
+		if attacher.taskID == taskID {
+			select {
+			case <-attacher.detachWaitCh:
+			case <-ctx.Done():
+				return ctx.Err()
+			}
+		}
 	}
 
 	return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
@@ -1289,9 +1302,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
 	agent := c.node.Agent()
 	attachWaitCh := make(chan *network.NetworkingConfig)
 	detachWaitCh := make(chan struct{})
+	attachCompleteCh := make(chan struct{})
 	c.attachers[aKey] = &attacher{
-		attachWaitCh: attachWaitCh,
-		detachWaitCh: detachWaitCh,
+		attachWaitCh:     attachWaitCh,
+		attachCompleteCh: attachCompleteCh,
+		detachWaitCh:     detachWaitCh,
 	}
 	c.Unlock()
 
@@ -1306,6 +1321,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
 		return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
 	}
 
+	c.Lock()
+	c.attachers[aKey].taskID = taskID
+	close(attachCompleteCh)
+	c.Unlock()
+
 	logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
 
 	var config *network.NetworkingConfig
@@ -1316,7 +1336,6 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
 	}
 
 	c.Lock()
-	c.attachers[aKey].taskID = taskID
 	c.attachers[aKey].config = config
 	c.Unlock()
 	return config, nil

+ 5 - 0
daemon/container_operations.go

@@ -384,6 +384,9 @@ func (daemon *Daemon) findAndAttachNetwork(container *container.Container, idOrN
 		return nil, nil, err
 	}
 
+	// This container has attachment to a swarm scope
+	// network. Update the container network settings accordingly.
+	container.NetworkSettings.HasSwarmEndpoint = true
 	return n, config, nil
 }
 
@@ -492,6 +495,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
 	// on first network connecting.
 	defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
 	if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok {
+		cleanOperationalData(nConf)
 		if err := daemon.connectToNetwork(container, defaultNetName, nConf.EndpointSettings, updateSettings); err != nil {
 			return err
 		}
@@ -512,6 +516,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
 	}
 
 	for i, epConf := range epConfigs {
+		cleanOperationalData(epConf)
 		if err := daemon.connectToNetwork(container, networks[i], epConf.EndpointSettings, updateSettings); err != nil {
 			return err
 		}

+ 31 - 1
daemon/daemon.go

@@ -206,7 +206,13 @@ func (daemon *Daemon) restore() error {
 			// fixme: only if not running
 			// get list of containers we need to restart
 			if !c.IsRunning() && !c.IsPaused() {
-				if daemon.configStore.AutoRestart && c.ShouldRestart() {
+				// Do not autostart containers which
+				// has endpoints in a swarm scope
+				// network yet since the cluster is
+				// not initialized yet. We will start
+				// it after the cluster is
+				// initialized.
+				if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
 					mapLock.Lock()
 					restartContainers[c] = make(chan struct{})
 					mapLock.Unlock()
@@ -350,6 +356,30 @@ func (daemon *Daemon) restore() error {
 	return nil
 }
 
+// RestartSwarmContainers restarts any autostart container which has a
+// swarm endpoint.
+func (daemon *Daemon) RestartSwarmContainers() {
+	group := sync.WaitGroup{}
+	for _, c := range daemon.List() {
+		if !c.IsRunning() && !c.IsPaused() {
+			// Autostart all the containers which has a
+			// swarm endpoint now that the cluster is
+			// initialized.
+			if daemon.configStore.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint {
+				group.Add(1)
+				go func(c *container.Container) {
+					defer group.Done()
+					if err := daemon.containerStart(c, ""); err != nil {
+						logrus.Error(err)
+					}
+				}(c)
+			}
+		}
+
+	}
+	group.Wait()
+}
+
 // waitForNetworks is used during daemon initialization when starting up containers
 // It ensures that all of a container's networks are available before the daemon tries to start the container.
 // In practice it just makes sure the discovery service is available for containers which use a network that require discovery.

+ 1 - 0
daemon/network/settings.go

@@ -21,6 +21,7 @@ type Settings struct {
 	SecondaryIPAddresses   []networktypes.Address
 	SecondaryIPv6Addresses []networktypes.Address
 	IsAnonymousEndpoint    bool
+	HasSwarmEndpoint       bool
 }
 
 // EndpointSettings is a package local wrapper for

+ 22 - 0
integration-cli/docker_cli_swarm_test.go

@@ -242,3 +242,25 @@ func (s *DockerSwarmSuite) TestSwarmServiceWithGroup(c *check.C) {
 	c.Assert(err, checker.IsNil)
 	c.Assert(strings.TrimSpace(out), checker.Equals, "uid=0(root) gid=0(root) groups=10(wheel),29(audio),50(staff),777")
 }
+
+func (s *DockerSwarmSuite) TestSwarmContainerAutoStart(c *check.C) {
+	d := s.AddDaemon(c, true, true)
+
+	out, err := d.Cmd("network", "create", "--attachable", "-d", "overlay", "foo")
+	c.Assert(err, checker.IsNil)
+	c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
+
+	out, err = d.Cmd("run", "-id", "--restart=always", "--net=foo", "--name=test", "busybox", "top")
+	c.Assert(err, checker.IsNil)
+	c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
+
+	out, err = d.Cmd("ps", "-q")
+	c.Assert(err, checker.IsNil)
+	c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
+
+	d.Restart()
+
+	out, err = d.Cmd("ps", "-q")
+	c.Assert(err, checker.IsNil)
+	c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
+}