Merge pull request #26449 from mrjana/net
Fix autostart for swarm scope connected containers
This commit is contained in:
commit
1d76ab4f80
6 changed files with 92 additions and 10 deletions
|
@ -262,6 +262,11 @@ func (cli *DaemonCli) start(opts daemonOptions) (err error) {
|
|||
logrus.Fatalf("Error creating cluster component: %v", err)
|
||||
}
|
||||
|
||||
// Restart all autostart containers which has a swarm endpoint
|
||||
// and is not yet running now that we have successfully
|
||||
// initialized the cluster.
|
||||
d.RestartSwarmContainers()
|
||||
|
||||
logrus.Info("Daemon has completed initialization")
|
||||
|
||||
logrus.WithFields(logrus.Fields{
|
||||
|
|
|
@ -135,10 +135,11 @@ type Cluster struct {
|
|||
// helps in identifying the attachment ID via the taskID and the
|
||||
// corresponding attachment configuration obtained from the manager.
|
||||
type attacher struct {
|
||||
taskID string
|
||||
config *network.NetworkingConfig
|
||||
attachWaitCh chan *network.NetworkingConfig
|
||||
detachWaitCh chan struct{}
|
||||
taskID string
|
||||
config *network.NetworkingConfig
|
||||
attachWaitCh chan *network.NetworkingConfig
|
||||
attachCompleteCh chan struct{}
|
||||
detachWaitCh chan struct{}
|
||||
}
|
||||
|
||||
type node struct {
|
||||
|
@ -1262,12 +1263,24 @@ func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID,
|
|||
agent := c.node.Agent()
|
||||
c.RUnlock()
|
||||
|
||||
if ok && attacher != nil && attacher.detachWaitCh != nil {
|
||||
if ok && attacher != nil &&
|
||||
attacher.detachWaitCh != nil &&
|
||||
attacher.attachCompleteCh != nil {
|
||||
// Attachment may be in progress still so wait for
|
||||
// attachment to complete.
|
||||
select {
|
||||
case <-attacher.detachWaitCh:
|
||||
case <-attacher.attachCompleteCh:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
if attacher.taskID == taskID {
|
||||
select {
|
||||
case <-attacher.detachWaitCh:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
|
||||
|
@ -1289,9 +1302,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
|
|||
agent := c.node.Agent()
|
||||
attachWaitCh := make(chan *network.NetworkingConfig)
|
||||
detachWaitCh := make(chan struct{})
|
||||
attachCompleteCh := make(chan struct{})
|
||||
c.attachers[aKey] = &attacher{
|
||||
attachWaitCh: attachWaitCh,
|
||||
detachWaitCh: detachWaitCh,
|
||||
attachWaitCh: attachWaitCh,
|
||||
attachCompleteCh: attachCompleteCh,
|
||||
detachWaitCh: detachWaitCh,
|
||||
}
|
||||
c.Unlock()
|
||||
|
||||
|
@ -1306,6 +1321,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
|
|||
return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
|
||||
}
|
||||
|
||||
c.Lock()
|
||||
c.attachers[aKey].taskID = taskID
|
||||
close(attachCompleteCh)
|
||||
c.Unlock()
|
||||
|
||||
logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
|
||||
|
||||
var config *network.NetworkingConfig
|
||||
|
@ -1316,7 +1336,6 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
|
|||
}
|
||||
|
||||
c.Lock()
|
||||
c.attachers[aKey].taskID = taskID
|
||||
c.attachers[aKey].config = config
|
||||
c.Unlock()
|
||||
return config, nil
|
||||
|
|
|
@ -384,6 +384,9 @@ func (daemon *Daemon) findAndAttachNetwork(container *container.Container, idOrN
|
|||
return nil, nil, err
|
||||
}
|
||||
|
||||
// This container has attachment to a swarm scope
|
||||
// network. Update the container network settings accordingly.
|
||||
container.NetworkSettings.HasSwarmEndpoint = true
|
||||
return n, config, nil
|
||||
}
|
||||
|
||||
|
@ -492,6 +495,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
|
|||
// on first network connecting.
|
||||
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
|
||||
if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok {
|
||||
cleanOperationalData(nConf)
|
||||
if err := daemon.connectToNetwork(container, defaultNetName, nConf.EndpointSettings, updateSettings); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -512,6 +516,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
|
|||
}
|
||||
|
||||
for i, epConf := range epConfigs {
|
||||
cleanOperationalData(epConf)
|
||||
if err := daemon.connectToNetwork(container, networks[i], epConf.EndpointSettings, updateSettings); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -206,7 +206,13 @@ func (daemon *Daemon) restore() error {
|
|||
// fixme: only if not running
|
||||
// get list of containers we need to restart
|
||||
if !c.IsRunning() && !c.IsPaused() {
|
||||
if daemon.configStore.AutoRestart && c.ShouldRestart() {
|
||||
// Do not autostart containers which
|
||||
// has endpoints in a swarm scope
|
||||
// network yet since the cluster is
|
||||
// not initialized yet. We will start
|
||||
// it after the cluster is
|
||||
// initialized.
|
||||
if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
|
||||
mapLock.Lock()
|
||||
restartContainers[c] = make(chan struct{})
|
||||
mapLock.Unlock()
|
||||
|
@ -350,6 +356,30 @@ func (daemon *Daemon) restore() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// RestartSwarmContainers restarts any autostart container which has a
|
||||
// swarm endpoint.
|
||||
func (daemon *Daemon) RestartSwarmContainers() {
|
||||
group := sync.WaitGroup{}
|
||||
for _, c := range daemon.List() {
|
||||
if !c.IsRunning() && !c.IsPaused() {
|
||||
// Autostart all the containers which has a
|
||||
// swarm endpoint now that the cluster is
|
||||
// initialized.
|
||||
if daemon.configStore.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint {
|
||||
group.Add(1)
|
||||
go func(c *container.Container) {
|
||||
defer group.Done()
|
||||
if err := daemon.containerStart(c, ""); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}(c)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
group.Wait()
|
||||
}
|
||||
|
||||
// waitForNetworks is used during daemon initialization when starting up containers
|
||||
// It ensures that all of a container's networks are available before the daemon tries to start the container.
|
||||
// In practice it just makes sure the discovery service is available for containers which use a network that require discovery.
|
||||
|
|
|
@ -21,6 +21,7 @@ type Settings struct {
|
|||
SecondaryIPAddresses []networktypes.Address
|
||||
SecondaryIPv6Addresses []networktypes.Address
|
||||
IsAnonymousEndpoint bool
|
||||
HasSwarmEndpoint bool
|
||||
}
|
||||
|
||||
// EndpointSettings is a package local wrapper for
|
||||
|
|
|
@ -242,3 +242,25 @@ func (s *DockerSwarmSuite) TestSwarmServiceWithGroup(c *check.C) {
|
|||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Equals, "uid=0(root) gid=0(root) groups=10(wheel),29(audio),50(staff),777")
|
||||
}
|
||||
|
||||
func (s *DockerSwarmSuite) TestSwarmContainerAutoStart(c *check.C) {
|
||||
d := s.AddDaemon(c, true, true)
|
||||
|
||||
out, err := d.Cmd("network", "create", "--attachable", "-d", "overlay", "foo")
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
|
||||
|
||||
out, err = d.Cmd("run", "-id", "--restart=always", "--net=foo", "--name=test", "busybox", "top")
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
|
||||
|
||||
out, err = d.Cmd("ps", "-q")
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
|
||||
|
||||
d.Restart()
|
||||
|
||||
out, err = d.Cmd("ps", "-q")
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue