Optimize networkDB queue
Added some optimizations to reduce the messages in the queue: 1) on join network the node execute a tcp sync with all the nodes that it is aware part of the specific network. During this time before the node was redistributing all the entries. This meant that if the network had 10K entries the queue of the joining node will jump to 10K. The fix adds a flag on the network that would avoid to insert any entry in the queue till the sync happens. Note that right now the flag is set in a best effort way, there is no real check if at least one of the nodes succeed. 2) limit the number of messages to redistribute coming from a TCP sync. Introduced a threshold that limit the number of messages that are propagated, this will disable this optimization in case of heavy load. Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
This commit is contained in:
parent
b09cb39fa5
commit
55e4cc7262
4 changed files with 51 additions and 17 deletions
|
@ -110,7 +110,6 @@ type tableEventMessage struct {
|
|||
tname string
|
||||
key string
|
||||
msg []byte
|
||||
node string
|
||||
}
|
||||
|
||||
func (m *tableEventMessage) Invalidates(other memberlist.Broadcast) bool {
|
||||
|
@ -168,7 +167,6 @@ func (nDB *NetworkDB) sendTableEvent(event TableEvent_Type, nid string, tname st
|
|||
id: nid,
|
||||
tname: tname,
|
||||
key: key,
|
||||
node: nDB.config.NodeID,
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -24,6 +24,9 @@ const (
|
|||
retryInterval = 1 * time.Second
|
||||
nodeReapInterval = 24 * time.Hour
|
||||
nodeReapPeriod = 2 * time.Hour
|
||||
// considering a cluster with > 20 nodes and a drain speed of 100 msg/s
|
||||
// the following is roughly 1 minute
|
||||
maxQueueLenBroadcastOnSync = 500
|
||||
)
|
||||
|
||||
type logWriter struct{}
|
||||
|
@ -572,6 +575,7 @@ func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) {
|
|||
|
||||
var err error
|
||||
var networks []string
|
||||
var success bool
|
||||
for _, node := range nodes {
|
||||
if node == nDB.config.NodeID {
|
||||
continue
|
||||
|
@ -579,21 +583,25 @@ func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) {
|
|||
logrus.Debugf("%v(%v): Initiating bulk sync with node %v", nDB.config.Hostname, nDB.config.NodeID, node)
|
||||
networks = nDB.findCommonNetworks(node)
|
||||
err = nDB.bulkSyncNode(networks, node, true)
|
||||
// if its periodic bulksync stop after the first successful sync
|
||||
if !all && err == nil {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
err = fmt.Errorf("bulk sync to node %s failed: %v", node, err)
|
||||
logrus.Warn(err.Error())
|
||||
} else {
|
||||
// bulk sync succeeded
|
||||
success = true
|
||||
// if its periodic bulksync stop after the first successful sync
|
||||
if !all {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if success {
|
||||
// if at least one node sync succeeded
|
||||
return networks, nil
|
||||
}
|
||||
|
||||
return networks, nil
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Bulk sync all the table entries belonging to a set of networks to a
|
||||
|
|
|
@ -142,7 +142,7 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
|
||||
func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent, isBulkSync bool) bool {
|
||||
// Update our local clock if the received messages has newer time.
|
||||
nDB.tableClock.Witness(tEvent.LTime)
|
||||
|
||||
|
@ -175,6 +175,14 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
|
|||
nDB.Unlock()
|
||||
return false
|
||||
}
|
||||
} else if tEvent.Type == TableEventTypeDelete && !isBulkSync {
|
||||
nDB.Unlock()
|
||||
// We don't know the entry, the entry is being deleted and the message is an async message
|
||||
// In this case the safest approach is to ignore it, it is possible that the queue grew so much to
|
||||
// exceed the garbage collection time (the residual reap time that is in the message is not being
|
||||
// updated, to avoid inserting too many messages in the queue).
|
||||
// Instead the messages coming from TCP bulk sync are safe with the latest value for the garbage collection time
|
||||
return false
|
||||
}
|
||||
|
||||
e = &entry{
|
||||
|
@ -197,11 +205,17 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
|
|||
nDB.Unlock()
|
||||
|
||||
if err != nil && tEvent.Type == TableEventTypeDelete {
|
||||
// If it is a delete event and we did not have a state for it, don't propagate to the application
|
||||
// Again we don't know the entry but this is coming from a TCP sync so the message body is up to date.
|
||||
// We had saved the state so to speed up convergence and be able to avoid accepting create events.
|
||||
// Now we will rebroadcast the message if 2 conditions are met:
|
||||
// 1) we had already synced this network (during the network join)
|
||||
// 2) the residual reapTime is higher than 1/6 of the total reapTime.
|
||||
// If the residual reapTime is lower or equal to 1/6 of the total reapTime don't bother broadcasting it around
|
||||
// most likely the cluster is already aware of it, if not who will sync with this node will catch the state too.
|
||||
// This also avoids that deletion of entries close to their garbage collection ends up circuling around forever
|
||||
return e.reapTime > nDB.config.reapEntryInterval/6
|
||||
// most likely the cluster is already aware of it
|
||||
// This also reduce the possibility that deletion of entries close to their garbage collection ends up circuling around
|
||||
// forever
|
||||
//logrus.Infof("exiting on delete not knowing the obj with rebroadcast:%t", network.inSync)
|
||||
return network.inSync && e.reapTime > nDB.config.reapEntryInterval/6
|
||||
}
|
||||
|
||||
var op opType
|
||||
|
@ -215,7 +229,7 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
|
|||
}
|
||||
|
||||
nDB.broadcaster.Write(makeEvent(op, tEvent.TableName, tEvent.NetworkID, tEvent.Key, tEvent.Value))
|
||||
return true
|
||||
return network.inSync
|
||||
}
|
||||
|
||||
func (nDB *NetworkDB) handleCompound(buf []byte, isBulkSync bool) {
|
||||
|
@ -244,7 +258,7 @@ func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) {
|
|||
return
|
||||
}
|
||||
|
||||
if rebroadcast := nDB.handleTableEvent(&tEvent); rebroadcast {
|
||||
if rebroadcast := nDB.handleTableEvent(&tEvent, isBulkSync); rebroadcast {
|
||||
var err error
|
||||
buf, err = encodeRawMessage(MessageTypeTableEvent, buf)
|
||||
if err != nil {
|
||||
|
@ -261,12 +275,16 @@ func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) {
|
|||
return
|
||||
}
|
||||
|
||||
// if the queue is over the threshold, avoid distributing information coming from TCP sync
|
||||
if isBulkSync && n.tableBroadcasts.NumQueued() > maxQueueLenBroadcastOnSync {
|
||||
return
|
||||
}
|
||||
|
||||
n.tableBroadcasts.QueueBroadcast(&tableEventMessage{
|
||||
msg: buf,
|
||||
id: tEvent.NetworkID,
|
||||
tname: tEvent.TableName,
|
||||
key: tEvent.Key,
|
||||
node: tEvent.NodeName,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -130,6 +130,9 @@ type network struct {
|
|||
// Lamport time for the latest state of the entry.
|
||||
ltime serf.LamportTime
|
||||
|
||||
// Gets set to true after the first bulk sync happens
|
||||
inSync bool
|
||||
|
||||
// Node leave is in progress.
|
||||
leaving bool
|
||||
|
||||
|
@ -616,6 +619,7 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error {
|
|||
}
|
||||
nDB.addNetworkNode(nid, nDB.config.NodeID)
|
||||
networkNodes := nDB.networkNodes[nid]
|
||||
n = nodeNetworks[nid]
|
||||
nDB.Unlock()
|
||||
|
||||
if err := nDB.sendNetworkEvent(nid, NetworkEventTypeJoin, ltime); err != nil {
|
||||
|
@ -627,6 +631,12 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error {
|
|||
logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err)
|
||||
}
|
||||
|
||||
// Mark the network as being synced
|
||||
// note this is a best effort, we are not checking the result of the bulk sync
|
||||
nDB.Lock()
|
||||
n.inSync = true
|
||||
nDB.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue