noderunner.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. package cluster // import "github.com/docker/docker/daemon/cluster"
  2. import (
  3. "context"
  4. "fmt"
  5. "path/filepath"
  6. "runtime"
  7. "strings"
  8. "sync"
  9. "time"
  10. types "github.com/docker/docker/api/types/swarm"
  11. "github.com/docker/docker/daemon/cluster/executor/container"
  12. lncluster "github.com/docker/libnetwork/cluster"
  13. swarmapi "github.com/docker/swarmkit/api"
  14. "github.com/docker/swarmkit/manager/allocator/cnmallocator"
  15. swarmnode "github.com/docker/swarmkit/node"
  16. "github.com/pkg/errors"
  17. "github.com/sirupsen/logrus"
  18. "google.golang.org/grpc"
  19. "google.golang.org/grpc/codes"
  20. "google.golang.org/grpc/status"
  21. )
  22. // nodeRunner implements a manager for continuously running swarmkit node, restarting them with backoff delays if needed.
  23. type nodeRunner struct {
  24. nodeState
  25. mu sync.RWMutex
  26. done chan struct{} // closed when swarmNode exits
  27. ready chan struct{} // closed when swarmNode becomes active
  28. reconnectDelay time.Duration
  29. config nodeStartConfig
  30. repeatedRun bool
  31. cancelReconnect func()
  32. stopping bool
  33. cluster *Cluster // only for accessing config helpers, never call any methods. TODO: change to config struct
  34. }
  35. // nodeStartConfig holds configuration needed to start a new node. Exported
  36. // fields of this structure are saved to disk in json. Unexported fields
  37. // contain data that shouldn't be persisted between daemon reloads.
  38. type nodeStartConfig struct {
  39. // LocalAddr is this machine's local IP or hostname, if specified.
  40. LocalAddr string
  41. // RemoteAddr is the address that was given to "swarm join". It is used
  42. // to find LocalAddr if necessary.
  43. RemoteAddr string
  44. // ListenAddr is the address we bind to, including a port.
  45. ListenAddr string
  46. // AdvertiseAddr is the address other nodes should connect to,
  47. // including a port.
  48. AdvertiseAddr string
  49. // DataPathAddr is the address that has to be used for the data path
  50. DataPathAddr string
  51. // DefaultAddressPool contains list of subnets
  52. DefaultAddressPool []string
  53. // SubnetSize contains subnet size of DefaultAddressPool
  54. SubnetSize uint32
  55. // JoinInProgress is set to true if a join operation has started, but
  56. // not completed yet.
  57. JoinInProgress bool
  58. joinAddr string
  59. forceNewCluster bool
  60. joinToken string
  61. lockKey []byte
  62. autolock bool
  63. availability types.NodeAvailability
  64. }
  65. func (n *nodeRunner) Ready() chan error {
  66. c := make(chan error, 1)
  67. n.mu.RLock()
  68. ready, done := n.ready, n.done
  69. n.mu.RUnlock()
  70. go func() {
  71. select {
  72. case <-ready:
  73. case <-done:
  74. }
  75. select {
  76. case <-ready:
  77. default:
  78. n.mu.RLock()
  79. c <- n.err
  80. n.mu.RUnlock()
  81. }
  82. close(c)
  83. }()
  84. return c
  85. }
  86. func (n *nodeRunner) Start(conf nodeStartConfig) error {
  87. n.mu.Lock()
  88. defer n.mu.Unlock()
  89. n.reconnectDelay = initialReconnectDelay
  90. return n.start(conf)
  91. }
  92. func (n *nodeRunner) start(conf nodeStartConfig) error {
  93. var control string
  94. if runtime.GOOS == "windows" {
  95. control = `\\.\pipe\` + controlSocket
  96. } else {
  97. control = filepath.Join(n.cluster.runtimeRoot, controlSocket)
  98. }
  99. joinAddr := conf.joinAddr
  100. if joinAddr == "" && conf.JoinInProgress {
  101. // We must have been restarted while trying to join a cluster.
  102. // Continue trying to join instead of forming our own cluster.
  103. joinAddr = conf.RemoteAddr
  104. }
  105. // Hostname is not set here. Instead, it is obtained from
  106. // the node description that is reported periodically
  107. swarmnodeConfig := swarmnode.Config{
  108. ForceNewCluster: conf.forceNewCluster,
  109. ListenControlAPI: control,
  110. ListenRemoteAPI: conf.ListenAddr,
  111. AdvertiseRemoteAPI: conf.AdvertiseAddr,
  112. NetworkConfig: &cnmallocator.NetworkConfig{
  113. DefaultAddrPool: conf.DefaultAddressPool,
  114. SubnetSize: conf.SubnetSize,
  115. },
  116. JoinAddr: joinAddr,
  117. StateDir: n.cluster.root,
  118. JoinToken: conf.joinToken,
  119. Executor: container.NewExecutor(
  120. n.cluster.config.Backend,
  121. n.cluster.config.PluginBackend,
  122. n.cluster.config.ImageBackend,
  123. n.cluster.config.VolumeBackend,
  124. ),
  125. HeartbeatTick: n.cluster.config.RaftHeartbeatTick,
  126. // Recommended value in etcd/raft is 10 x (HeartbeatTick).
  127. // Lower values were seen to have caused instability because of
  128. // frequent leader elections when running on flakey networks.
  129. ElectionTick: n.cluster.config.RaftElectionTick,
  130. UnlockKey: conf.lockKey,
  131. AutoLockManagers: conf.autolock,
  132. PluginGetter: n.cluster.config.Backend.PluginGetter(),
  133. }
  134. if conf.availability != "" {
  135. avail, ok := swarmapi.NodeSpec_Availability_value[strings.ToUpper(string(conf.availability))]
  136. if !ok {
  137. return fmt.Errorf("invalid Availability: %q", conf.availability)
  138. }
  139. swarmnodeConfig.Availability = swarmapi.NodeSpec_Availability(avail)
  140. }
  141. node, err := swarmnode.New(&swarmnodeConfig)
  142. if err != nil {
  143. return err
  144. }
  145. if err := node.Start(context.Background()); err != nil {
  146. return err
  147. }
  148. n.done = make(chan struct{})
  149. n.ready = make(chan struct{})
  150. n.swarmNode = node
  151. if conf.joinAddr != "" {
  152. conf.JoinInProgress = true
  153. }
  154. n.config = conf
  155. savePersistentState(n.cluster.root, conf)
  156. ctx, cancel := context.WithCancel(context.Background())
  157. go func() {
  158. n.handleNodeExit(node)
  159. cancel()
  160. }()
  161. go n.handleReadyEvent(ctx, node, n.ready)
  162. go n.handleControlSocketChange(ctx, node)
  163. return nil
  164. }
  165. func (n *nodeRunner) handleControlSocketChange(ctx context.Context, node *swarmnode.Node) {
  166. for conn := range node.ListenControlSocket(ctx) {
  167. n.mu.Lock()
  168. if n.grpcConn != conn {
  169. if conn == nil {
  170. n.controlClient = nil
  171. n.logsClient = nil
  172. } else {
  173. n.controlClient = swarmapi.NewControlClient(conn)
  174. n.logsClient = swarmapi.NewLogsClient(conn)
  175. // push store changes to daemon
  176. go n.watchClusterEvents(ctx, conn)
  177. }
  178. }
  179. n.grpcConn = conn
  180. n.mu.Unlock()
  181. n.cluster.SendClusterEvent(lncluster.EventSocketChange)
  182. }
  183. }
  184. func (n *nodeRunner) watchClusterEvents(ctx context.Context, conn *grpc.ClientConn) {
  185. client := swarmapi.NewWatchClient(conn)
  186. watch, err := client.Watch(ctx, &swarmapi.WatchRequest{
  187. Entries: []*swarmapi.WatchRequest_WatchEntry{
  188. {
  189. Kind: "node",
  190. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  191. },
  192. {
  193. Kind: "service",
  194. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  195. },
  196. {
  197. Kind: "network",
  198. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  199. },
  200. {
  201. Kind: "secret",
  202. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  203. },
  204. {
  205. Kind: "config",
  206. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  207. },
  208. },
  209. IncludeOldObject: true,
  210. })
  211. if err != nil {
  212. logrus.WithError(err).Error("failed to watch cluster store")
  213. return
  214. }
  215. for {
  216. msg, err := watch.Recv()
  217. if err != nil {
  218. // store watch is broken
  219. errStatus, ok := status.FromError(err)
  220. if !ok || errStatus.Code() != codes.Canceled {
  221. logrus.WithError(err).Error("failed to receive changes from store watch API")
  222. }
  223. return
  224. }
  225. select {
  226. case <-ctx.Done():
  227. return
  228. case n.cluster.watchStream <- msg:
  229. }
  230. }
  231. }
  232. func (n *nodeRunner) handleReadyEvent(ctx context.Context, node *swarmnode.Node, ready chan struct{}) {
  233. select {
  234. case <-node.Ready():
  235. n.mu.Lock()
  236. n.err = nil
  237. if n.config.JoinInProgress {
  238. n.config.JoinInProgress = false
  239. savePersistentState(n.cluster.root, n.config)
  240. }
  241. n.mu.Unlock()
  242. close(ready)
  243. case <-ctx.Done():
  244. }
  245. n.cluster.SendClusterEvent(lncluster.EventNodeReady)
  246. }
  247. func (n *nodeRunner) handleNodeExit(node *swarmnode.Node) {
  248. err := detectLockedError(node.Err(context.Background()))
  249. if err != nil {
  250. logrus.Errorf("cluster exited with error: %v", err)
  251. }
  252. n.mu.Lock()
  253. n.swarmNode = nil
  254. n.err = err
  255. close(n.done)
  256. select {
  257. case <-n.ready:
  258. n.enableReconnectWatcher()
  259. default:
  260. if n.repeatedRun {
  261. n.enableReconnectWatcher()
  262. }
  263. }
  264. n.repeatedRun = true
  265. n.mu.Unlock()
  266. }
  267. // Stop stops the current swarm node if it is running.
  268. func (n *nodeRunner) Stop() error {
  269. n.mu.Lock()
  270. if n.cancelReconnect != nil { // between restarts
  271. n.cancelReconnect()
  272. n.cancelReconnect = nil
  273. }
  274. if n.swarmNode == nil {
  275. n.mu.Unlock()
  276. return nil
  277. }
  278. n.stopping = true
  279. ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
  280. defer cancel()
  281. n.mu.Unlock()
  282. if err := n.swarmNode.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") {
  283. return err
  284. }
  285. n.cluster.SendClusterEvent(lncluster.EventNodeLeave)
  286. <-n.done
  287. return nil
  288. }
  289. func (n *nodeRunner) State() nodeState {
  290. if n == nil {
  291. return nodeState{status: types.LocalNodeStateInactive}
  292. }
  293. n.mu.RLock()
  294. defer n.mu.RUnlock()
  295. ns := n.nodeState
  296. if ns.err != nil || n.cancelReconnect != nil {
  297. if errors.Cause(ns.err) == errSwarmLocked {
  298. ns.status = types.LocalNodeStateLocked
  299. } else {
  300. ns.status = types.LocalNodeStateError
  301. }
  302. } else {
  303. select {
  304. case <-n.ready:
  305. ns.status = types.LocalNodeStateActive
  306. default:
  307. ns.status = types.LocalNodeStatePending
  308. }
  309. }
  310. return ns
  311. }
  312. func (n *nodeRunner) enableReconnectWatcher() {
  313. if n.stopping {
  314. return
  315. }
  316. n.reconnectDelay *= 2
  317. if n.reconnectDelay > maxReconnectDelay {
  318. n.reconnectDelay = maxReconnectDelay
  319. }
  320. logrus.Warnf("Restarting swarm in %.2f seconds", n.reconnectDelay.Seconds())
  321. delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay)
  322. n.cancelReconnect = cancel
  323. go func() {
  324. <-delayCtx.Done()
  325. if delayCtx.Err() != context.DeadlineExceeded {
  326. return
  327. }
  328. n.mu.Lock()
  329. defer n.mu.Unlock()
  330. if n.stopping {
  331. return
  332. }
  333. if err := n.start(n.config); err != nil {
  334. n.err = err
  335. }
  336. }()
  337. }
  338. // nodeState represents information about the current state of the cluster and
  339. // provides access to the grpc clients.
  340. type nodeState struct {
  341. swarmNode *swarmnode.Node
  342. grpcConn *grpc.ClientConn
  343. controlClient swarmapi.ControlClient
  344. logsClient swarmapi.LogsClient
  345. status types.LocalNodeState
  346. actualLocalAddr string
  347. err error
  348. }
  349. // IsActiveManager returns true if node is a manager ready to accept control requests. It is safe to access the client properties if this returns true.
  350. func (ns nodeState) IsActiveManager() bool {
  351. return ns.controlClient != nil
  352. }
  353. // IsManager returns true if node is a manager.
  354. func (ns nodeState) IsManager() bool {
  355. return ns.swarmNode != nil && ns.swarmNode.Manager() != nil
  356. }
  357. // NodeID returns node's ID or empty string if node is inactive.
  358. func (ns nodeState) NodeID() string {
  359. if ns.swarmNode != nil {
  360. return ns.swarmNode.NodeID()
  361. }
  362. return ""
  363. }