nodes.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. package dispatcher
  2. import (
  3. "sync"
  4. "time"
  5. "google.golang.org/grpc"
  6. "google.golang.org/grpc/codes"
  7. "github.com/docker/swarmkit/api"
  8. "github.com/docker/swarmkit/identity"
  9. "github.com/docker/swarmkit/manager/dispatcher/heartbeat"
  10. )
  11. const rateLimitCount = 3
  12. type registeredNode struct {
  13. SessionID string
  14. Heartbeat *heartbeat.Heartbeat
  15. Registered time.Time
  16. Attempts int
  17. Node *api.Node
  18. Disconnect chan struct{} // signal to disconnect
  19. mu sync.Mutex
  20. }
  21. // checkSessionID determines if the SessionID has changed and returns the
  22. // appropriate GRPC error code.
  23. //
  24. // This may not belong here in the future.
  25. func (rn *registeredNode) checkSessionID(sessionID string) error {
  26. rn.mu.Lock()
  27. defer rn.mu.Unlock()
  28. // Before each message send, we need to check the nodes sessionID hasn't
  29. // changed. If it has, we will the stream and make the node
  30. // re-register.
  31. if sessionID == "" || rn.SessionID != sessionID {
  32. return grpc.Errorf(codes.InvalidArgument, ErrSessionInvalid.Error())
  33. }
  34. return nil
  35. }
  36. type nodeStore struct {
  37. periodChooser *periodChooser
  38. gracePeriodMultiplierNormal time.Duration
  39. gracePeriodMultiplierUnknown time.Duration
  40. rateLimitPeriod time.Duration
  41. nodes map[string]*registeredNode
  42. mu sync.RWMutex
  43. }
  44. func newNodeStore(hbPeriod, hbEpsilon time.Duration, graceMultiplier int, rateLimitPeriod time.Duration) *nodeStore {
  45. return &nodeStore{
  46. nodes: make(map[string]*registeredNode),
  47. periodChooser: newPeriodChooser(hbPeriod, hbEpsilon),
  48. gracePeriodMultiplierNormal: time.Duration(graceMultiplier),
  49. gracePeriodMultiplierUnknown: time.Duration(graceMultiplier) * 2,
  50. rateLimitPeriod: rateLimitPeriod,
  51. }
  52. }
  53. func (s *nodeStore) updatePeriod(hbPeriod, hbEpsilon time.Duration, gracePeriodMultiplier int) {
  54. s.mu.Lock()
  55. s.periodChooser = newPeriodChooser(hbPeriod, hbEpsilon)
  56. s.gracePeriodMultiplierNormal = time.Duration(gracePeriodMultiplier)
  57. s.gracePeriodMultiplierUnknown = s.gracePeriodMultiplierNormal * 2
  58. s.mu.Unlock()
  59. }
  60. func (s *nodeStore) Len() int {
  61. s.mu.Lock()
  62. defer s.mu.Unlock()
  63. return len(s.nodes)
  64. }
  65. func (s *nodeStore) AddUnknown(n *api.Node, expireFunc func()) error {
  66. s.mu.Lock()
  67. defer s.mu.Unlock()
  68. rn := &registeredNode{
  69. Node: n,
  70. }
  71. s.nodes[n.ID] = rn
  72. rn.Heartbeat = heartbeat.New(s.periodChooser.Choose()*s.gracePeriodMultiplierUnknown, expireFunc)
  73. return nil
  74. }
  75. // CheckRateLimit returns error if node with specified id is allowed to re-register
  76. // again.
  77. func (s *nodeStore) CheckRateLimit(id string) error {
  78. s.mu.Lock()
  79. defer s.mu.Unlock()
  80. if existRn, ok := s.nodes[id]; ok {
  81. if time.Since(existRn.Registered) > s.rateLimitPeriod {
  82. existRn.Attempts = 0
  83. }
  84. existRn.Attempts++
  85. if existRn.Attempts > rateLimitCount {
  86. return grpc.Errorf(codes.Unavailable, "node %s exceeded rate limit count of registrations", id)
  87. }
  88. existRn.Registered = time.Now()
  89. }
  90. return nil
  91. }
  92. // Add adds new node and returns it, it replaces existing without notification.
  93. func (s *nodeStore) Add(n *api.Node, expireFunc func()) *registeredNode {
  94. s.mu.Lock()
  95. defer s.mu.Unlock()
  96. var attempts int
  97. var registered time.Time
  98. if existRn, ok := s.nodes[n.ID]; ok {
  99. attempts = existRn.Attempts
  100. registered = existRn.Registered
  101. existRn.Heartbeat.Stop()
  102. delete(s.nodes, n.ID)
  103. }
  104. if registered.IsZero() {
  105. registered = time.Now()
  106. }
  107. rn := &registeredNode{
  108. SessionID: identity.NewID(), // session ID is local to the dispatcher.
  109. Node: n,
  110. Registered: registered,
  111. Attempts: attempts,
  112. Disconnect: make(chan struct{}),
  113. }
  114. s.nodes[n.ID] = rn
  115. rn.Heartbeat = heartbeat.New(s.periodChooser.Choose()*s.gracePeriodMultiplierNormal, expireFunc)
  116. return rn
  117. }
  118. func (s *nodeStore) Get(id string) (*registeredNode, error) {
  119. s.mu.RLock()
  120. rn, ok := s.nodes[id]
  121. s.mu.RUnlock()
  122. if !ok {
  123. return nil, grpc.Errorf(codes.NotFound, ErrNodeNotRegistered.Error())
  124. }
  125. return rn, nil
  126. }
  127. func (s *nodeStore) GetWithSession(id, sid string) (*registeredNode, error) {
  128. s.mu.RLock()
  129. rn, ok := s.nodes[id]
  130. s.mu.RUnlock()
  131. if !ok {
  132. return nil, grpc.Errorf(codes.NotFound, ErrNodeNotRegistered.Error())
  133. }
  134. return rn, rn.checkSessionID(sid)
  135. }
  136. func (s *nodeStore) Heartbeat(id, sid string) (time.Duration, error) {
  137. rn, err := s.GetWithSession(id, sid)
  138. if err != nil {
  139. return 0, err
  140. }
  141. period := s.periodChooser.Choose() // base period for node
  142. grace := period * time.Duration(s.gracePeriodMultiplierNormal)
  143. rn.mu.Lock()
  144. rn.Heartbeat.Update(grace)
  145. rn.Heartbeat.Beat()
  146. rn.mu.Unlock()
  147. return period, nil
  148. }
  149. func (s *nodeStore) Delete(id string) *registeredNode {
  150. s.mu.Lock()
  151. var node *registeredNode
  152. if rn, ok := s.nodes[id]; ok {
  153. delete(s.nodes, id)
  154. rn.Heartbeat.Stop()
  155. node = rn
  156. }
  157. s.mu.Unlock()
  158. return node
  159. }
  160. func (s *nodeStore) Disconnect(id string) {
  161. s.mu.Lock()
  162. if rn, ok := s.nodes[id]; ok {
  163. close(rn.Disconnect)
  164. rn.Heartbeat.Stop()
  165. }
  166. s.mu.Unlock()
  167. }
  168. // Clean removes all nodes and stops their heartbeats.
  169. // It's equivalent to invalidate all sessions.
  170. func (s *nodeStore) Clean() {
  171. s.mu.Lock()
  172. for _, rn := range s.nodes {
  173. rn.Heartbeat.Stop()
  174. }
  175. s.nodes = make(map[string]*registeredNode)
  176. s.mu.Unlock()
  177. }