Browse Source

serfJoin doesnt happen if self notification comes later

With the recently introduced docker discovery, the self node discovery
notification can reach the overlay driver after the remote node
discovery notification.  In scenarios such as 2 node setup, it seems more
likely. In those scenarios, the serfJoin is not triggered and hence the
neighborship is not formed between the 2 nodes.

The fix is to retain the knowledge of the neighbor and reuse it
immediately after the serfInit is done. Since we do the serfJoin just
once, there is no harm in changing the neighIP to a new value even if it
is not used.

Signed-off-by: Madhu Venugopal <madhu@docker.com>
Madhu Venugopal 9 years ago
parent
commit
fac4e67c16
2 changed files with 15 additions and 11 deletions
  1. 4 4
      libnetwork/drivers/overlay/ov_serf.go
  2. 11 7
      libnetwork/drivers/overlay/overlay.go

+ 4 - 4
libnetwork/drivers/overlay/ov_serf.go

@@ -68,13 +68,13 @@ func (d *driver) serfInit() error {
 	return nil
 	return nil
 }
 }
 
 
-func (d *driver) serfJoin() error {
-	if d.neighIP == "" {
+func (d *driver) serfJoin(neighIP string) error {
+	if neighIP == "" {
 		return fmt.Errorf("no neighbor to join")
 		return fmt.Errorf("no neighbor to join")
 	}
 	}
-	if _, err := d.serfInstance.Join([]string{d.neighIP}, false); err != nil {
+	if _, err := d.serfInstance.Join([]string{neighIP}, false); err != nil {
 		return fmt.Errorf("Failed to join the cluster at neigh IP %s: %v",
 		return fmt.Errorf("Failed to join the cluster at neigh IP %s: %v",
-			d.neighIP, err)
+			neighIP, err)
 	}
 	}
 	return nil
 	return nil
 }
 }

+ 11 - 7
libnetwork/drivers/overlay/overlay.go

@@ -152,7 +152,7 @@ func (d *driver) Type() string {
 }
 }
 
 
 func (d *driver) nodeJoin(node string, self bool) {
 func (d *driver) nodeJoin(node string, self bool) {
-	if self && node != "" && !d.isSerfAlive() {
+	if self && !d.isSerfAlive() {
 		d.Lock()
 		d.Lock()
 		d.bindAddress = node
 		d.bindAddress = node
 		d.Unlock()
 		d.Unlock()
@@ -163,13 +163,17 @@ func (d *driver) nodeJoin(node string, self bool) {
 		}
 		}
 	}
 	}
 
 
-	if d.serfInstance != nil && !self && node != "" {
+	d.Lock()
+	if !self {
+		d.neighIP = node
+	}
+	neighIP := d.neighIP
+	d.Unlock()
+
+	if d.serfInstance != nil && neighIP != "" {
 		var err error
 		var err error
 		d.joinOnce.Do(func() {
 		d.joinOnce.Do(func() {
-			d.Lock()
-			d.neighIP = node
-			d.Unlock()
-			err = d.serfJoin()
+			err = d.serfJoin(neighIP)
 		})
 		})
 		if err != nil {
 		if err != nil {
 			logrus.Errorf("joining serf neighbor %s failed: %v", node, err)
 			logrus.Errorf("joining serf neighbor %s failed: %v", node, err)
@@ -185,7 +189,7 @@ func (d *driver) nodeJoin(node string, self bool) {
 func (d *driver) DiscoverNew(dType driverapi.DiscoveryType, data interface{}) error {
 func (d *driver) DiscoverNew(dType driverapi.DiscoveryType, data interface{}) error {
 	if dType == driverapi.NodeDiscovery {
 	if dType == driverapi.NodeDiscovery {
 		nodeData, ok := data.(driverapi.NodeDiscoveryData)
 		nodeData, ok := data.(driverapi.NodeDiscoveryData)
-		if !ok {
+		if !ok || nodeData.Address == "" {
 			return fmt.Errorf("invalid discovery data")
 			return fmt.Errorf("invalid discovery data")
 		}
 		}
 		d.nodeJoin(nodeData.Address, nodeData.Self)
 		d.nodeJoin(nodeData.Address, nodeData.Self)