Retry AttachNetwork when it fails to find network
When trying to attach to swarm scope network for an unmanaged container sometimes even if attaching to network succeeds, we may not find the network because some other container which was using the network went down and removed the network. So if it is not found, try to detach and reattach to re-download the network from the manager. Fixes #26588 Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
This commit is contained in:
parent
a13fc3fd36
commit
849e345e2c
1 changed files with 40 additions and 15 deletions
|
@ -357,26 +357,51 @@ func (daemon *Daemon) findAndAttachNetwork(container *container.Container, idOrN
|
|||
}
|
||||
}
|
||||
|
||||
// In all other cases, attempt to attach to the network to
|
||||
// trigger attachment in the swarm cluster manager.
|
||||
var config *networktypes.NetworkingConfig
|
||||
if daemon.clusterProvider != nil {
|
||||
var err error
|
||||
config, err = daemon.clusterProvider.AttachNetwork(idOrName, container.ID, addresses)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
var (
|
||||
config *networktypes.NetworkingConfig
|
||||
retryCount int
|
||||
)
|
||||
|
||||
n, err = daemon.FindNetwork(idOrName)
|
||||
if err != nil {
|
||||
for {
|
||||
// In all other cases, attempt to attach to the network to
|
||||
// trigger attachment in the swarm cluster manager.
|
||||
if daemon.clusterProvider != nil {
|
||||
if err := daemon.clusterProvider.DetachNetwork(idOrName, container.ID); err != nil {
|
||||
logrus.Warnf("Could not rollback attachment for container %s to network %s: %v", container.ID, idOrName, err)
|
||||
var err error
|
||||
config, err = daemon.clusterProvider.AttachNetwork(idOrName, container.ID, addresses)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil, err
|
||||
n, err = daemon.FindNetwork(idOrName)
|
||||
if err != nil {
|
||||
if daemon.clusterProvider != nil {
|
||||
if err := daemon.clusterProvider.DetachNetwork(idOrName, container.ID); err != nil {
|
||||
logrus.Warnf("Could not rollback attachment for container %s to network %s: %v", container.ID, idOrName, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Retry network attach again if we failed to
|
||||
// find the network after successfull
|
||||
// attachment because the only reason that
|
||||
// would happen is if some other container
|
||||
// attached to the swarm scope network went down
|
||||
// and removed the network while we were in
|
||||
// the process of attaching.
|
||||
if config != nil {
|
||||
if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok {
|
||||
if retryCount >= 5 {
|
||||
return nil, nil, fmt.Errorf("could not find network %s after successful attachment", idOrName)
|
||||
}
|
||||
retryCount++
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
// This container has attachment to a swarm scope
|
||||
|
|
Loading…
Reference in a new issue