Browse Source

Fix infinite loop in bulk sync

Due to a slice management logic error the bulk sync for loop can go on
indefinitely and eventually leading to an OOM error. Fixed the logic so
that an infinite loop never occurs. Also changed the bulk sync wait
timeout to use a timer rather than use time.After as time.After is known
to consume a lot of memory when called in a tight loop.

Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
Jana Radhakrishnan 9 years ago
parent
commit
6034058dc3
1 changed files with 8 additions and 2 deletions
  1. 8 2
      libnetwork/networkdb/cluster.go

+ 8 - 2
libnetwork/networkdb/cluster.go

@@ -330,11 +330,15 @@ func (nDB *NetworkDB) bulkSyncTables() {
 		// successfully completed bulk sync in this iteration.
 		// successfully completed bulk sync in this iteration.
 		updatedNetworks := make([]string, 0, len(networks))
 		updatedNetworks := make([]string, 0, len(networks))
 		for _, nid := range networks {
 		for _, nid := range networks {
+			var found bool
 			for _, completedNid := range completed {
 			for _, completedNid := range completed {
 				if nid == completedNid {
 				if nid == completedNid {
-					continue
+					found = true
+					break
 				}
 				}
+			}
 
 
+			if !found {
 				updatedNetworks = append(updatedNetworks, nid)
 				updatedNetworks = append(updatedNetworks, nid)
 			}
 			}
 		}
 		}
@@ -449,8 +453,9 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b
 	// Wait on a response only if it is unsolicited.
 	// Wait on a response only if it is unsolicited.
 	if unsolicited {
 	if unsolicited {
 		startTime := time.Now()
 		startTime := time.Now()
+		t := time.NewTimer(30 * time.Second)
 		select {
 		select {
-		case <-time.After(30 * time.Second):
+		case <-t.C:
 			logrus.Errorf("Bulk sync to node %s timed out", node)
 			logrus.Errorf("Bulk sync to node %s timed out", node)
 		case <-ch:
 		case <-ch:
 			nDB.Lock()
 			nDB.Lock()
@@ -459,6 +464,7 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b
 
 
 			logrus.Debugf("%s: Bulk sync to node %s took %s", nDB.config.NodeName, node, time.Now().Sub(startTime))
 			logrus.Debugf("%s: Bulk sync to node %s took %s", nDB.config.NodeName, node, time.Now().Sub(startTime))
 		}
 		}
+		t.Stop()
 	}
 	}
 
 
 	return nil
 	return nil