Reap failed nodes after 24 hours
Signed-off-by: Santhosh Manohar <santhosh@docker.com>
This commit is contained in:
parent
6b74a8d479
commit
e98b152bac
3 changed files with 23 additions and 3 deletions
|
@ -16,9 +16,11 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
reapInterval = 60 * time.Second
|
||||
reapPeriod = 5 * time.Second
|
||||
retryInterval = 1 * time.Second
|
||||
reapInterval = 60 * time.Second
|
||||
reapPeriod = 5 * time.Second
|
||||
retryInterval = 1 * time.Second
|
||||
nodeReapInterval = 24 * time.Hour
|
||||
nodeReapPeriod = 2 * time.Hour
|
||||
)
|
||||
|
||||
type logWriter struct{}
|
||||
|
@ -147,6 +149,7 @@ func (nDB *NetworkDB) clusterInit() error {
|
|||
{config.GossipInterval, nDB.gossip},
|
||||
{config.PushPullInterval, nDB.bulkSyncTables},
|
||||
{retryInterval, nDB.reconnectNode},
|
||||
{nodeReapPeriod, nDB.reapDeadNode},
|
||||
} {
|
||||
t := time.NewTicker(trigger.interval)
|
||||
go nDB.triggerFunc(trigger.interval, t.C, nDB.stopCh, trigger.fn)
|
||||
|
@ -234,6 +237,19 @@ func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, sto
|
|||
}
|
||||
}
|
||||
|
||||
func (nDB *NetworkDB) reapDeadNode() {
|
||||
nDB.Lock()
|
||||
defer nDB.Unlock()
|
||||
for id, n := range nDB.failedNodes {
|
||||
if n.reapTime > 0 {
|
||||
n.reapTime -= reapPeriod
|
||||
continue
|
||||
}
|
||||
logrus.Debugf("Removing failed node %v from gossip cluster", n.Name)
|
||||
delete(nDB.failedNodes, id)
|
||||
}
|
||||
}
|
||||
|
||||
func (nDB *NetworkDB) reconnectNode() {
|
||||
nDB.RLock()
|
||||
if len(nDB.failedNodes) == 0 {
|
||||
|
|
|
@ -29,6 +29,8 @@ func (e *eventDelegate) NotifyLeave(mn *memberlist.Node) {
|
|||
e.nDB.Lock()
|
||||
if n, ok := e.nDB.nodes[mn.Name]; ok {
|
||||
delete(e.nDB.nodes, mn.Name)
|
||||
|
||||
n.reapTime = reapInterval
|
||||
e.nDB.failedNodes[mn.Name] = n
|
||||
}
|
||||
e.nDB.Unlock()
|
||||
|
|
|
@ -94,6 +94,8 @@ type NetworkDB struct {
|
|||
type node struct {
|
||||
memberlist.Node
|
||||
ltime serf.LamportTime
|
||||
// Number of hours left before the reaper removes the node
|
||||
reapTime time.Duration
|
||||
}
|
||||
|
||||
// network describes the node/network attachment.
|
||||
|
|
Loading…
Reference in a new issue