noderunner.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. package cluster // import "github.com/docker/docker/daemon/cluster"
  2. import (
  3. "context"
  4. "fmt"
  5. "path/filepath"
  6. "strings"
  7. "sync"
  8. "time"
  9. "github.com/containerd/log"
  10. types "github.com/docker/docker/api/types/swarm"
  11. "github.com/docker/docker/daemon/cluster/executor/container"
  12. lncluster "github.com/docker/docker/libnetwork/cluster"
  13. swarmapi "github.com/moby/swarmkit/v2/api"
  14. swarmallocator "github.com/moby/swarmkit/v2/manager/allocator/cnmallocator"
  15. swarmnode "github.com/moby/swarmkit/v2/node"
  16. "github.com/pkg/errors"
  17. "google.golang.org/grpc"
  18. "google.golang.org/grpc/codes"
  19. "google.golang.org/grpc/status"
  20. )
  21. // nodeRunner implements a manager for continuously running swarmkit node, restarting them with backoff delays if needed.
  22. type nodeRunner struct {
  23. nodeState
  24. mu sync.RWMutex
  25. done chan struct{} // closed when swarmNode exits
  26. ready chan struct{} // closed when swarmNode becomes active
  27. reconnectDelay time.Duration
  28. config nodeStartConfig
  29. repeatedRun bool
  30. cancelReconnect func()
  31. stopping bool
  32. cluster *Cluster // only for accessing config helpers, never call any methods. TODO: change to config struct
  33. }
  34. // nodeStartConfig holds configuration needed to start a new node. Exported
  35. // fields of this structure are saved to disk in json. Unexported fields
  36. // contain data that shouldn't be persisted between daemon reloads.
  37. type nodeStartConfig struct {
  38. // LocalAddr is this machine's local IP or hostname, if specified.
  39. LocalAddr string
  40. // RemoteAddr is the address that was given to "swarm join". It is used
  41. // to find LocalAddr if necessary.
  42. RemoteAddr string
  43. // ListenAddr is the address we bind to, including a port.
  44. ListenAddr string
  45. // AdvertiseAddr is the address other nodes should connect to,
  46. // including a port.
  47. AdvertiseAddr string
  48. // DataPathAddr is the address that has to be used for the data path
  49. DataPathAddr string
  50. // DefaultAddressPool contains list of subnets
  51. DefaultAddressPool []string
  52. // SubnetSize contains subnet size of DefaultAddressPool
  53. SubnetSize uint32
  54. // DataPathPort contains Data path port (VXLAN UDP port) number that is used for data traffic.
  55. DataPathPort uint32
  56. // JoinInProgress is set to true if a join operation has started, but
  57. // not completed yet.
  58. JoinInProgress bool
  59. joinAddr string
  60. forceNewCluster bool
  61. joinToken string
  62. lockKey []byte
  63. autolock bool
  64. availability types.NodeAvailability
  65. }
  66. func (n *nodeRunner) Ready() chan error {
  67. c := make(chan error, 1)
  68. n.mu.RLock()
  69. ready, done := n.ready, n.done
  70. n.mu.RUnlock()
  71. go func() {
  72. select {
  73. case <-ready:
  74. case <-done:
  75. }
  76. select {
  77. case <-ready:
  78. default:
  79. n.mu.RLock()
  80. c <- n.err
  81. n.mu.RUnlock()
  82. }
  83. close(c)
  84. }()
  85. return c
  86. }
  87. func (n *nodeRunner) Start(conf nodeStartConfig) error {
  88. n.mu.Lock()
  89. defer n.mu.Unlock()
  90. n.reconnectDelay = initialReconnectDelay
  91. return n.start(conf)
  92. }
  93. func (n *nodeRunner) start(conf nodeStartConfig) error {
  94. var control string
  95. if isWindows {
  96. control = `\\.\pipe\` + controlSocket
  97. } else {
  98. control = filepath.Join(n.cluster.runtimeRoot, controlSocket)
  99. }
  100. joinAddr := conf.joinAddr
  101. if joinAddr == "" && conf.JoinInProgress {
  102. // We must have been restarted while trying to join a cluster.
  103. // Continue trying to join instead of forming our own cluster.
  104. joinAddr = conf.RemoteAddr
  105. }
  106. // Hostname is not set here. Instead, it is obtained from
  107. // the node description that is reported periodically
  108. swarmnodeConfig := swarmnode.Config{
  109. ForceNewCluster: conf.forceNewCluster,
  110. ListenControlAPI: control,
  111. ListenRemoteAPI: conf.ListenAddr,
  112. AdvertiseRemoteAPI: conf.AdvertiseAddr,
  113. NetworkConfig: &swarmallocator.NetworkConfig{
  114. DefaultAddrPool: conf.DefaultAddressPool,
  115. SubnetSize: conf.SubnetSize,
  116. VXLANUDPPort: conf.DataPathPort,
  117. },
  118. JoinAddr: joinAddr,
  119. StateDir: n.cluster.root,
  120. JoinToken: conf.joinToken,
  121. Executor: container.NewExecutor(
  122. n.cluster.config.Backend,
  123. n.cluster.config.PluginBackend,
  124. n.cluster.config.ImageBackend,
  125. n.cluster.config.VolumeBackend,
  126. ),
  127. HeartbeatTick: n.cluster.config.RaftHeartbeatTick,
  128. // Recommended value in etcd/raft is 10 x (HeartbeatTick).
  129. // Lower values were seen to have caused instability because of
  130. // frequent leader elections when running on flakey networks.
  131. ElectionTick: n.cluster.config.RaftElectionTick,
  132. UnlockKey: conf.lockKey,
  133. AutoLockManagers: conf.autolock,
  134. PluginGetter: n.cluster.config.Backend.PluginGetter(),
  135. }
  136. if conf.availability != "" {
  137. avail, ok := swarmapi.NodeSpec_Availability_value[strings.ToUpper(string(conf.availability))]
  138. if !ok {
  139. return fmt.Errorf("invalid Availability: %q", conf.availability)
  140. }
  141. swarmnodeConfig.Availability = swarmapi.NodeSpec_Availability(avail)
  142. }
  143. node, err := swarmnode.New(&swarmnodeConfig)
  144. if err != nil {
  145. return err
  146. }
  147. if err := node.Start(context.Background()); err != nil {
  148. return err
  149. }
  150. n.done = make(chan struct{})
  151. n.ready = make(chan struct{})
  152. n.swarmNode = node
  153. if conf.joinAddr != "" {
  154. conf.JoinInProgress = true
  155. }
  156. n.config = conf
  157. savePersistentState(n.cluster.root, conf)
  158. ctx, cancel := context.WithCancel(context.Background())
  159. go func() {
  160. n.handleNodeExit(node)
  161. cancel()
  162. }()
  163. go n.handleReadyEvent(ctx, node, n.ready)
  164. go n.handleControlSocketChange(ctx, node)
  165. return nil
  166. }
  167. func (n *nodeRunner) handleControlSocketChange(ctx context.Context, node *swarmnode.Node) {
  168. for conn := range node.ListenControlSocket(ctx) {
  169. n.mu.Lock()
  170. if n.grpcConn != conn {
  171. if conn == nil {
  172. n.controlClient = nil
  173. n.logsClient = nil
  174. } else {
  175. n.controlClient = swarmapi.NewControlClient(conn)
  176. n.logsClient = swarmapi.NewLogsClient(conn)
  177. // push store changes to daemon
  178. go n.watchClusterEvents(ctx, conn)
  179. }
  180. }
  181. n.grpcConn = conn
  182. n.mu.Unlock()
  183. n.cluster.SendClusterEvent(lncluster.EventSocketChange)
  184. }
  185. }
  186. func (n *nodeRunner) watchClusterEvents(ctx context.Context, conn *grpc.ClientConn) {
  187. client := swarmapi.NewWatchClient(conn)
  188. watch, err := client.Watch(ctx, &swarmapi.WatchRequest{
  189. Entries: []*swarmapi.WatchRequest_WatchEntry{
  190. {
  191. Kind: "node",
  192. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  193. },
  194. {
  195. Kind: "service",
  196. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  197. },
  198. {
  199. Kind: "network",
  200. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  201. },
  202. {
  203. Kind: "secret",
  204. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  205. },
  206. {
  207. Kind: "config",
  208. Action: swarmapi.WatchActionKindCreate | swarmapi.WatchActionKindUpdate | swarmapi.WatchActionKindRemove,
  209. },
  210. },
  211. IncludeOldObject: true,
  212. })
  213. if err != nil {
  214. log.G(ctx).WithError(err).Error("failed to watch cluster store")
  215. return
  216. }
  217. for {
  218. msg, err := watch.Recv()
  219. if err != nil {
  220. // store watch is broken
  221. errStatus, ok := status.FromError(err)
  222. if !ok || errStatus.Code() != codes.Canceled {
  223. log.G(ctx).WithError(err).Error("failed to receive changes from store watch API")
  224. }
  225. return
  226. }
  227. select {
  228. case <-ctx.Done():
  229. return
  230. case n.cluster.watchStream <- msg:
  231. }
  232. }
  233. }
  234. func (n *nodeRunner) handleReadyEvent(ctx context.Context, node *swarmnode.Node, ready chan struct{}) {
  235. select {
  236. case <-node.Ready():
  237. n.mu.Lock()
  238. n.err = nil
  239. if n.config.JoinInProgress {
  240. n.config.JoinInProgress = false
  241. savePersistentState(n.cluster.root, n.config)
  242. }
  243. n.mu.Unlock()
  244. close(ready)
  245. case <-ctx.Done():
  246. }
  247. n.cluster.SendClusterEvent(lncluster.EventNodeReady)
  248. }
  249. func (n *nodeRunner) handleNodeExit(node *swarmnode.Node) {
  250. err := detectLockedError(node.Err(context.Background()))
  251. if err != nil {
  252. log.G(context.TODO()).Errorf("cluster exited with error: %v", err)
  253. }
  254. n.mu.Lock()
  255. n.swarmNode = nil
  256. n.err = err
  257. close(n.done)
  258. select {
  259. case <-n.ready:
  260. n.enableReconnectWatcher()
  261. default:
  262. if n.repeatedRun {
  263. n.enableReconnectWatcher()
  264. }
  265. }
  266. n.repeatedRun = true
  267. n.mu.Unlock()
  268. }
  269. // Stop stops the current swarm node if it is running.
  270. func (n *nodeRunner) Stop() error {
  271. n.mu.Lock()
  272. if n.cancelReconnect != nil { // between restarts
  273. n.cancelReconnect()
  274. n.cancelReconnect = nil
  275. }
  276. if n.swarmNode == nil {
  277. // even though the swarm node is nil we still may need
  278. // to send a node leave event to perform any cleanup required.
  279. if n.cluster != nil {
  280. n.cluster.SendClusterEvent(lncluster.EventNodeLeave)
  281. }
  282. n.mu.Unlock()
  283. return nil
  284. }
  285. n.stopping = true
  286. ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
  287. defer cancel()
  288. n.mu.Unlock()
  289. if err := n.swarmNode.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") {
  290. return err
  291. }
  292. n.cluster.SendClusterEvent(lncluster.EventNodeLeave)
  293. <-n.done
  294. return nil
  295. }
  296. func (n *nodeRunner) State() nodeState {
  297. if n == nil {
  298. return nodeState{status: types.LocalNodeStateInactive}
  299. }
  300. n.mu.RLock()
  301. defer n.mu.RUnlock()
  302. ns := n.nodeState
  303. if ns.err != nil || n.cancelReconnect != nil {
  304. if errors.Is(ns.err, errSwarmLocked) {
  305. ns.status = types.LocalNodeStateLocked
  306. } else {
  307. ns.status = types.LocalNodeStateError
  308. }
  309. } else {
  310. select {
  311. case <-n.ready:
  312. ns.status = types.LocalNodeStateActive
  313. default:
  314. ns.status = types.LocalNodeStatePending
  315. }
  316. }
  317. return ns
  318. }
  319. func (n *nodeRunner) enableReconnectWatcher() {
  320. if n.stopping {
  321. return
  322. }
  323. n.reconnectDelay *= 2
  324. if n.reconnectDelay > maxReconnectDelay {
  325. n.reconnectDelay = maxReconnectDelay
  326. }
  327. log.G(context.TODO()).Warnf("Restarting swarm in %.2f seconds", n.reconnectDelay.Seconds())
  328. delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay)
  329. n.cancelReconnect = cancel
  330. go func() {
  331. <-delayCtx.Done()
  332. if delayCtx.Err() != context.DeadlineExceeded {
  333. return
  334. }
  335. n.mu.Lock()
  336. defer n.mu.Unlock()
  337. if n.stopping {
  338. return
  339. }
  340. if err := n.start(n.config); err != nil {
  341. n.err = err
  342. }
  343. }()
  344. }
  345. // nodeState represents information about the current state of the cluster and
  346. // provides access to the grpc clients.
  347. type nodeState struct {
  348. swarmNode *swarmnode.Node
  349. grpcConn *grpc.ClientConn
  350. controlClient swarmapi.ControlClient
  351. logsClient swarmapi.LogsClient
  352. status types.LocalNodeState
  353. actualLocalAddr string
  354. err error
  355. }
  356. // IsActiveManager returns true if node is a manager ready to accept control requests. It is safe to access the client properties if this returns true.
  357. func (ns nodeState) IsActiveManager() bool {
  358. return ns.controlClient != nil
  359. }
  360. // IsManager returns true if node is a manager.
  361. func (ns nodeState) IsManager() bool {
  362. return ns.swarmNode != nil && ns.swarmNode.Manager() != nil
  363. }
  364. // NodeID returns node's ID or empty string if node is inactive.
  365. func (ns nodeState) NodeID() string {
  366. if ns.swarmNode != nil {
  367. return ns.swarmNode.NodeID()
  368. }
  369. return ""
  370. }