node.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970
  1. package node
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "io/ioutil"
  6. "net"
  7. "os"
  8. "path/filepath"
  9. "reflect"
  10. "sort"
  11. "sync"
  12. "time"
  13. "github.com/Sirupsen/logrus"
  14. "github.com/boltdb/bolt"
  15. "github.com/docker/docker/pkg/plugingetter"
  16. "github.com/docker/swarmkit/agent"
  17. "github.com/docker/swarmkit/agent/exec"
  18. "github.com/docker/swarmkit/api"
  19. "github.com/docker/swarmkit/ca"
  20. "github.com/docker/swarmkit/connectionbroker"
  21. "github.com/docker/swarmkit/ioutils"
  22. "github.com/docker/swarmkit/log"
  23. "github.com/docker/swarmkit/manager"
  24. "github.com/docker/swarmkit/manager/encryption"
  25. "github.com/docker/swarmkit/remotes"
  26. "github.com/docker/swarmkit/xnet"
  27. grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
  28. "github.com/pkg/errors"
  29. "golang.org/x/net/context"
  30. "google.golang.org/grpc"
  31. "google.golang.org/grpc/credentials"
  32. )
  33. const (
  34. stateFilename = "state.json"
  35. roleChangeTimeout = 16 * time.Second
  36. )
  37. var (
  38. errNodeStarted = errors.New("node: already started")
  39. errNodeNotStarted = errors.New("node: not started")
  40. certDirectory = "certificates"
  41. // ErrInvalidUnlockKey is returned when we can't decrypt the TLS certificate
  42. ErrInvalidUnlockKey = errors.New("node is locked, and needs a valid unlock key")
  43. )
  44. // Config provides values for a Node.
  45. type Config struct {
  46. // Hostname is the name of host for agent instance.
  47. Hostname string
  48. // JoinAddr specifies node that should be used for the initial connection to
  49. // other manager in cluster. This should be only one address and optional,
  50. // the actual remotes come from the stored state.
  51. JoinAddr string
  52. // StateDir specifies the directory the node uses to keep the state of the
  53. // remote managers and certificates.
  54. StateDir string
  55. // JoinToken is the token to be used on the first certificate request.
  56. JoinToken string
  57. // ExternalCAs is a list of CAs to which a manager node
  58. // will make certificate signing requests for node certificates.
  59. ExternalCAs []*api.ExternalCA
  60. // ForceNewCluster creates a new cluster from current raft state.
  61. ForceNewCluster bool
  62. // ListenControlAPI specifies address the control API should listen on.
  63. ListenControlAPI string
  64. // ListenRemoteAPI specifies the address for the remote API that agents
  65. // and raft members connect to.
  66. ListenRemoteAPI string
  67. // AdvertiseRemoteAPI specifies the address that should be advertised
  68. // for connections to the remote API (including the raft service).
  69. AdvertiseRemoteAPI string
  70. // Executor specifies the executor to use for the agent.
  71. Executor exec.Executor
  72. // ElectionTick defines the amount of ticks needed without
  73. // leader to trigger a new election
  74. ElectionTick uint32
  75. // HeartbeatTick defines the amount of ticks between each
  76. // heartbeat sent to other members for health-check purposes
  77. HeartbeatTick uint32
  78. // AutoLockManagers determines whether or not an unlock key will be generated
  79. // when bootstrapping a new cluster for the first time
  80. AutoLockManagers bool
  81. // UnlockKey is the key to unlock a node - used for decrypting at rest. This
  82. // only applies to nodes that have already joined a cluster.
  83. UnlockKey []byte
  84. // Availability allows a user to control the current scheduling status of a node
  85. Availability api.NodeSpec_Availability
  86. // PluginGetter provides access to docker's plugin inventory.
  87. PluginGetter plugingetter.PluginGetter
  88. }
  89. // Node implements the primary node functionality for a member of a swarm
  90. // cluster. Node handles workloads and may also run as a manager.
  91. type Node struct {
  92. sync.RWMutex
  93. config *Config
  94. remotes *persistentRemotes
  95. connBroker *connectionbroker.Broker
  96. role string
  97. roleCond *sync.Cond
  98. conn *grpc.ClientConn
  99. connCond *sync.Cond
  100. nodeID string
  101. started chan struct{}
  102. startOnce sync.Once
  103. stopped chan struct{}
  104. stopOnce sync.Once
  105. ready chan struct{} // closed when agent has completed registration and manager(if enabled) is ready to receive control requests
  106. closed chan struct{}
  107. err error
  108. agent *agent.Agent
  109. manager *manager.Manager
  110. notifyNodeChange chan *api.Node // used to send role updates from the dispatcher api on promotion/demotion
  111. unlockKey []byte
  112. }
  113. // RemoteAPIAddr returns address on which remote manager api listens.
  114. // Returns nil if node is not manager.
  115. func (n *Node) RemoteAPIAddr() (string, error) {
  116. n.RLock()
  117. defer n.RUnlock()
  118. if n.manager == nil {
  119. return "", errors.New("manager is not running")
  120. }
  121. addr := n.manager.Addr()
  122. if addr == "" {
  123. return "", errors.New("manager addr is not set")
  124. }
  125. return addr, nil
  126. }
  127. // New returns new Node instance.
  128. func New(c *Config) (*Node, error) {
  129. if err := os.MkdirAll(c.StateDir, 0700); err != nil {
  130. return nil, err
  131. }
  132. stateFile := filepath.Join(c.StateDir, stateFilename)
  133. dt, err := ioutil.ReadFile(stateFile)
  134. var p []api.Peer
  135. if err != nil && !os.IsNotExist(err) {
  136. return nil, err
  137. }
  138. if err == nil {
  139. if err := json.Unmarshal(dt, &p); err != nil {
  140. return nil, err
  141. }
  142. }
  143. n := &Node{
  144. remotes: newPersistentRemotes(stateFile, p...),
  145. role: ca.WorkerRole,
  146. config: c,
  147. started: make(chan struct{}),
  148. stopped: make(chan struct{}),
  149. closed: make(chan struct{}),
  150. ready: make(chan struct{}),
  151. notifyNodeChange: make(chan *api.Node, 1),
  152. unlockKey: c.UnlockKey,
  153. }
  154. if n.config.JoinAddr != "" || n.config.ForceNewCluster {
  155. n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename))
  156. if n.config.JoinAddr != "" {
  157. n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight)
  158. }
  159. }
  160. n.connBroker = connectionbroker.New(n.remotes)
  161. n.roleCond = sync.NewCond(n.RLocker())
  162. n.connCond = sync.NewCond(n.RLocker())
  163. return n, nil
  164. }
  165. // BindRemote starts a listener that exposes the remote API.
  166. func (n *Node) BindRemote(ctx context.Context, listenAddr string, advertiseAddr string) error {
  167. n.RLock()
  168. defer n.RUnlock()
  169. if n.manager == nil {
  170. return errors.New("manager is not running")
  171. }
  172. return n.manager.BindRemote(ctx, manager.RemoteAddrs{
  173. ListenAddr: listenAddr,
  174. AdvertiseAddr: advertiseAddr,
  175. })
  176. }
  177. // Start starts a node instance.
  178. func (n *Node) Start(ctx context.Context) error {
  179. err := errNodeStarted
  180. n.startOnce.Do(func() {
  181. close(n.started)
  182. go n.run(ctx)
  183. err = nil // clear error above, only once.
  184. })
  185. return err
  186. }
  187. func (n *Node) run(ctx context.Context) (err error) {
  188. defer func() {
  189. n.err = err
  190. close(n.closed)
  191. }()
  192. ctx, cancel := context.WithCancel(ctx)
  193. defer cancel()
  194. ctx = log.WithModule(ctx, "node")
  195. go func(ctx context.Context) {
  196. select {
  197. case <-ctx.Done():
  198. case <-n.stopped:
  199. cancel()
  200. }
  201. }(ctx)
  202. securityConfig, err := n.loadSecurityConfig(ctx)
  203. if err != nil {
  204. return err
  205. }
  206. ctx = log.WithLogger(ctx, log.G(ctx).WithField("node.id", n.NodeID()))
  207. taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db")
  208. if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
  209. return err
  210. }
  211. db, err := bolt.Open(taskDBPath, 0666, nil)
  212. if err != nil {
  213. return err
  214. }
  215. defer db.Close()
  216. agentDone := make(chan struct{})
  217. forceCertRenewal := make(chan struct{})
  218. renewCert := func() {
  219. for {
  220. select {
  221. case forceCertRenewal <- struct{}{}:
  222. return
  223. case <-agentDone:
  224. return
  225. case <-n.notifyNodeChange:
  226. // consume from the channel to avoid blocking the writer
  227. }
  228. }
  229. }
  230. go func() {
  231. for {
  232. select {
  233. case <-agentDone:
  234. return
  235. case node := <-n.notifyNodeChange:
  236. // If the server is sending us a ForceRenewal State, renew
  237. if node.Certificate.Status.State == api.IssuanceStateRotate {
  238. renewCert()
  239. continue
  240. }
  241. n.Lock()
  242. // If we got a role change, renew
  243. role := ca.WorkerRole
  244. if node.Role == api.NodeRoleManager {
  245. role = ca.ManagerRole
  246. }
  247. if n.role == role {
  248. n.Unlock()
  249. continue
  250. }
  251. n.Unlock()
  252. renewCert()
  253. }
  254. }
  255. }()
  256. var wg sync.WaitGroup
  257. wg.Add(3)
  258. updates := ca.RenewTLSConfig(ctx, securityConfig, n.connBroker, forceCertRenewal)
  259. go func() {
  260. for certUpdate := range updates {
  261. if certUpdate.Err != nil {
  262. logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
  263. continue
  264. }
  265. n.Lock()
  266. n.role = certUpdate.Role
  267. n.roleCond.Broadcast()
  268. n.Unlock()
  269. }
  270. wg.Done()
  271. }()
  272. role := n.role
  273. managerReady := make(chan struct{})
  274. agentReady := make(chan struct{})
  275. var managerErr error
  276. var agentErr error
  277. go func() {
  278. managerErr = n.superviseManager(ctx, securityConfig, managerReady, forceCertRenewal) // store err and loop
  279. wg.Done()
  280. cancel()
  281. }()
  282. go func() {
  283. agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady)
  284. wg.Done()
  285. cancel()
  286. close(agentDone)
  287. }()
  288. go func() {
  289. <-agentReady
  290. if role == ca.ManagerRole {
  291. workerRole := make(chan struct{})
  292. waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  293. go func() {
  294. if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  295. close(workerRole)
  296. }
  297. }()
  298. select {
  299. case <-managerReady:
  300. case <-workerRole:
  301. }
  302. waitRoleCancel()
  303. }
  304. close(n.ready)
  305. }()
  306. wg.Wait()
  307. if managerErr != nil && managerErr != context.Canceled {
  308. return managerErr
  309. }
  310. if agentErr != nil && agentErr != context.Canceled {
  311. return agentErr
  312. }
  313. return err
  314. }
  315. // Stop stops node execution
  316. func (n *Node) Stop(ctx context.Context) error {
  317. select {
  318. case <-n.started:
  319. default:
  320. return errNodeNotStarted
  321. }
  322. // ask agent to clean up assignments
  323. n.Lock()
  324. if n.agent != nil {
  325. if err := n.agent.Leave(ctx); err != nil {
  326. log.G(ctx).WithError(err).Error("agent failed to clean up assignments")
  327. }
  328. }
  329. n.Unlock()
  330. n.stopOnce.Do(func() {
  331. close(n.stopped)
  332. })
  333. select {
  334. case <-n.closed:
  335. return nil
  336. case <-ctx.Done():
  337. return ctx.Err()
  338. }
  339. }
  340. // Err returns the error that caused the node to shutdown or nil. Err blocks
  341. // until the node has fully shut down.
  342. func (n *Node) Err(ctx context.Context) error {
  343. select {
  344. case <-n.closed:
  345. return n.err
  346. case <-ctx.Done():
  347. return ctx.Err()
  348. }
  349. }
  350. func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportCredentials, ready chan<- struct{}) error {
  351. waitCtx, waitCancel := context.WithCancel(ctx)
  352. remotesCh := n.remotes.WaitSelect(ctx)
  353. controlCh := n.ListenControlSocket(waitCtx)
  354. waitPeer:
  355. for {
  356. select {
  357. case <-ctx.Done():
  358. break waitPeer
  359. case <-remotesCh:
  360. break waitPeer
  361. case conn := <-controlCh:
  362. if conn != nil {
  363. break waitPeer
  364. }
  365. }
  366. }
  367. waitCancel()
  368. select {
  369. case <-ctx.Done():
  370. return ctx.Err()
  371. default:
  372. }
  373. a, err := agent.New(&agent.Config{
  374. Hostname: n.config.Hostname,
  375. ConnBroker: n.connBroker,
  376. Executor: n.config.Executor,
  377. DB: db,
  378. NotifyNodeChange: n.notifyNodeChange,
  379. Credentials: creds,
  380. })
  381. if err != nil {
  382. return err
  383. }
  384. if err := a.Start(ctx); err != nil {
  385. return err
  386. }
  387. n.Lock()
  388. n.agent = a
  389. n.Unlock()
  390. defer func() {
  391. n.Lock()
  392. n.agent = nil
  393. n.Unlock()
  394. }()
  395. go func() {
  396. <-a.Ready()
  397. close(ready)
  398. }()
  399. // todo: manually call stop on context cancellation?
  400. return a.Err(context.Background())
  401. }
  402. // Ready returns a channel that is closed after node's initialization has
  403. // completes for the first time.
  404. func (n *Node) Ready() <-chan struct{} {
  405. return n.ready
  406. }
  407. func (n *Node) setControlSocket(conn *grpc.ClientConn) {
  408. n.Lock()
  409. if n.conn != nil {
  410. n.conn.Close()
  411. }
  412. n.conn = conn
  413. n.connBroker.SetLocalConn(conn)
  414. n.connCond.Broadcast()
  415. n.Unlock()
  416. }
  417. // ListenControlSocket listens changes of a connection for managing the
  418. // cluster control api
  419. func (n *Node) ListenControlSocket(ctx context.Context) <-chan *grpc.ClientConn {
  420. c := make(chan *grpc.ClientConn, 1)
  421. n.RLock()
  422. conn := n.conn
  423. c <- conn
  424. done := make(chan struct{})
  425. go func() {
  426. select {
  427. case <-ctx.Done():
  428. n.connCond.Broadcast()
  429. case <-done:
  430. }
  431. }()
  432. go func() {
  433. defer close(c)
  434. defer close(done)
  435. defer n.RUnlock()
  436. for {
  437. select {
  438. case <-ctx.Done():
  439. return
  440. default:
  441. }
  442. if conn == n.conn {
  443. n.connCond.Wait()
  444. continue
  445. }
  446. conn = n.conn
  447. select {
  448. case c <- conn:
  449. case <-ctx.Done():
  450. return
  451. }
  452. }
  453. }()
  454. return c
  455. }
  456. // NodeID returns current node's ID. May be empty if not set.
  457. func (n *Node) NodeID() string {
  458. n.RLock()
  459. defer n.RUnlock()
  460. return n.nodeID
  461. }
  462. // Manager returns manager instance started by node. May be nil.
  463. func (n *Node) Manager() *manager.Manager {
  464. n.RLock()
  465. defer n.RUnlock()
  466. return n.manager
  467. }
  468. // Agent returns agent instance started by node. May be nil.
  469. func (n *Node) Agent() *agent.Agent {
  470. n.RLock()
  471. defer n.RUnlock()
  472. return n.agent
  473. }
  474. // IsStateDirty returns true if any objects have been added to raft which make
  475. // the state "dirty". Currently, the existence of any object other than the
  476. // default cluster or the local node implies a dirty state.
  477. func (n *Node) IsStateDirty() (bool, error) {
  478. n.RLock()
  479. defer n.RUnlock()
  480. if n.manager == nil {
  481. return false, errors.New("node is not a manager")
  482. }
  483. return n.manager.IsStateDirty()
  484. }
  485. // Remotes returns a list of known peers known to node.
  486. func (n *Node) Remotes() []api.Peer {
  487. weights := n.remotes.Weights()
  488. remotes := make([]api.Peer, 0, len(weights))
  489. for p := range weights {
  490. remotes = append(remotes, p)
  491. }
  492. return remotes
  493. }
  494. func (n *Node) loadSecurityConfig(ctx context.Context) (*ca.SecurityConfig, error) {
  495. paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory))
  496. var securityConfig *ca.SecurityConfig
  497. krw := ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
  498. if err := krw.Migrate(); err != nil {
  499. return nil, err
  500. }
  501. // Check if we already have a valid certificates on disk.
  502. rootCA, err := ca.GetLocalRootCA(paths.RootCA)
  503. if err != nil && err != ca.ErrNoLocalRootCA {
  504. return nil, err
  505. }
  506. if err == nil {
  507. // if forcing a new cluster, we allow the certificates to be expired - a new set will be generated
  508. securityConfig, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
  509. if err != nil {
  510. _, isInvalidKEK := errors.Cause(err).(ca.ErrInvalidKEK)
  511. if isInvalidKEK {
  512. return nil, ErrInvalidUnlockKey
  513. } else if !os.IsNotExist(err) {
  514. return nil, errors.Wrapf(err, "error while loading TLS certificate in %s", paths.Node.Cert)
  515. }
  516. }
  517. }
  518. if securityConfig == nil {
  519. if n.config.JoinAddr == "" {
  520. // if we're not joining a cluster, bootstrap a new one - and we have to set the unlock key
  521. n.unlockKey = nil
  522. if n.config.AutoLockManagers {
  523. n.unlockKey = encryption.GenerateSecretKey()
  524. }
  525. krw = ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
  526. rootCA, err = ca.CreateRootCA(ca.DefaultRootCN)
  527. if err != nil {
  528. return nil, err
  529. }
  530. if err := ca.SaveRootCA(rootCA, paths.RootCA); err != nil {
  531. return nil, err
  532. }
  533. log.G(ctx).Debug("generated CA key and certificate")
  534. } else if err == ca.ErrNoLocalRootCA { // from previous error loading the root CA from disk
  535. rootCA, err = ca.DownloadRootCA(ctx, paths.RootCA, n.config.JoinToken, n.connBroker)
  536. if err != nil {
  537. return nil, err
  538. }
  539. log.G(ctx).Debug("downloaded CA certificate")
  540. }
  541. // Obtain new certs and setup TLS certificates renewal for this node:
  542. // - If certificates weren't present on disk, we call CreateSecurityConfig, which blocks
  543. // until a valid certificate has been issued.
  544. // - We wait for CreateSecurityConfig to finish since we need a certificate to operate.
  545. // Attempt to load certificate from disk
  546. securityConfig, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
  547. if err == nil {
  548. log.G(ctx).WithFields(logrus.Fields{
  549. "node.id": securityConfig.ClientTLSCreds.NodeID(),
  550. }).Debugf("loaded TLS certificate")
  551. } else {
  552. if _, ok := errors.Cause(err).(ca.ErrInvalidKEK); ok {
  553. return nil, ErrInvalidUnlockKey
  554. }
  555. log.G(ctx).WithError(err).Debugf("no node credentials found in: %s", krw.Target())
  556. securityConfig, err = rootCA.CreateSecurityConfig(ctx, krw, ca.CertificateRequestConfig{
  557. Token: n.config.JoinToken,
  558. Availability: n.config.Availability,
  559. ConnBroker: n.connBroker,
  560. })
  561. if err != nil {
  562. return nil, err
  563. }
  564. }
  565. }
  566. n.Lock()
  567. n.role = securityConfig.ClientTLSCreds.Role()
  568. n.nodeID = securityConfig.ClientTLSCreds.NodeID()
  569. n.roleCond.Broadcast()
  570. n.Unlock()
  571. return securityConfig, nil
  572. }
  573. func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) error {
  574. opts := []grpc.DialOption{
  575. grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor),
  576. grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor),
  577. }
  578. insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
  579. opts = append(opts, grpc.WithTransportCredentials(insecureCreds))
  580. addr := n.config.ListenControlAPI
  581. opts = append(opts, grpc.WithDialer(
  582. func(addr string, timeout time.Duration) (net.Conn, error) {
  583. return xnet.DialTimeoutLocal(addr, timeout)
  584. }))
  585. conn, err := grpc.Dial(addr, opts...)
  586. if err != nil {
  587. return err
  588. }
  589. client := api.NewHealthClient(conn)
  590. for {
  591. resp, err := client.Check(ctx, &api.HealthCheckRequest{Service: "ControlAPI"})
  592. if err != nil {
  593. return err
  594. }
  595. if resp.Status == api.HealthCheckResponse_SERVING {
  596. break
  597. }
  598. time.Sleep(500 * time.Millisecond)
  599. }
  600. n.setControlSocket(conn)
  601. if ready != nil {
  602. close(ready)
  603. }
  604. return nil
  605. }
  606. func (n *Node) waitRole(ctx context.Context, role string) error {
  607. n.roleCond.L.Lock()
  608. if role == n.role {
  609. n.roleCond.L.Unlock()
  610. return nil
  611. }
  612. finishCh := make(chan struct{})
  613. defer close(finishCh)
  614. go func() {
  615. select {
  616. case <-finishCh:
  617. case <-ctx.Done():
  618. // call broadcast to shutdown this function
  619. n.roleCond.Broadcast()
  620. }
  621. }()
  622. defer n.roleCond.L.Unlock()
  623. for role != n.role {
  624. n.roleCond.Wait()
  625. select {
  626. case <-ctx.Done():
  627. return ctx.Err()
  628. default:
  629. }
  630. }
  631. return nil
  632. }
  633. func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig, ready chan struct{}, workerRole <-chan struct{}) (bool, error) {
  634. var remoteAPI *manager.RemoteAddrs
  635. if n.config.ListenRemoteAPI != "" {
  636. remoteAPI = &manager.RemoteAddrs{
  637. ListenAddr: n.config.ListenRemoteAPI,
  638. AdvertiseAddr: n.config.AdvertiseRemoteAPI,
  639. }
  640. }
  641. remoteAddr, _ := n.remotes.Select(n.NodeID())
  642. m, err := manager.New(&manager.Config{
  643. ForceNewCluster: n.config.ForceNewCluster,
  644. RemoteAPI: remoteAPI,
  645. ControlAPI: n.config.ListenControlAPI,
  646. SecurityConfig: securityConfig,
  647. ExternalCAs: n.config.ExternalCAs,
  648. JoinRaft: remoteAddr.Addr,
  649. StateDir: n.config.StateDir,
  650. HeartbeatTick: n.config.HeartbeatTick,
  651. ElectionTick: n.config.ElectionTick,
  652. AutoLockManagers: n.config.AutoLockManagers,
  653. UnlockKey: n.unlockKey,
  654. Availability: n.config.Availability,
  655. PluginGetter: n.config.PluginGetter,
  656. })
  657. if err != nil {
  658. return false, err
  659. }
  660. done := make(chan struct{})
  661. var runErr error
  662. go func(logger *logrus.Entry) {
  663. if err := m.Run(log.WithLogger(context.Background(), logger)); err != nil {
  664. runErr = err
  665. }
  666. close(done)
  667. }(log.G(ctx))
  668. var clearData bool
  669. defer func() {
  670. n.Lock()
  671. n.manager = nil
  672. n.Unlock()
  673. m.Stop(ctx, clearData)
  674. <-done
  675. n.setControlSocket(nil)
  676. }()
  677. n.Lock()
  678. n.manager = m
  679. n.Unlock()
  680. connCtx, connCancel := context.WithCancel(ctx)
  681. defer connCancel()
  682. go n.initManagerConnection(connCtx, ready)
  683. // wait for manager stop or for role change
  684. select {
  685. case <-done:
  686. return false, runErr
  687. case <-workerRole:
  688. log.G(ctx).Info("role changed to worker, stopping manager")
  689. clearData = true
  690. case <-m.RemovedFromRaft():
  691. log.G(ctx).Info("manager removed from raft cluster, stopping manager")
  692. clearData = true
  693. case <-ctx.Done():
  694. return false, ctx.Err()
  695. }
  696. return clearData, nil
  697. }
  698. func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.SecurityConfig, ready chan struct{}, forceCertRenewal chan struct{}) error {
  699. for {
  700. if err := n.waitRole(ctx, ca.ManagerRole); err != nil {
  701. return err
  702. }
  703. workerRole := make(chan struct{})
  704. waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  705. go func() {
  706. if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  707. close(workerRole)
  708. }
  709. }()
  710. wasRemoved, err := n.runManager(ctx, securityConfig, ready, workerRole)
  711. if err != nil {
  712. waitRoleCancel()
  713. return errors.Wrap(err, "manager stopped")
  714. }
  715. // If the manager stopped running and our role is still
  716. // "manager", it's possible that the manager was demoted and
  717. // the agent hasn't realized this yet. We should wait for the
  718. // role to change instead of restarting the manager immediately.
  719. err = func() error {
  720. timer := time.NewTimer(roleChangeTimeout)
  721. defer timer.Stop()
  722. defer waitRoleCancel()
  723. select {
  724. case <-timer.C:
  725. case <-workerRole:
  726. return nil
  727. case <-ctx.Done():
  728. return ctx.Err()
  729. }
  730. if !wasRemoved {
  731. log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  732. return nil
  733. }
  734. // We need to be extra careful about restarting the
  735. // manager. It may cause the node to wrongly join under
  736. // a new Raft ID. Since we didn't see a role change
  737. // yet, force a certificate renewal. If the certificate
  738. // comes back with a worker role, we know we shouldn't
  739. // restart the manager. However, if we don't see
  740. // workerRole get closed, it means we didn't switch to
  741. // a worker certificate, either because we couldn't
  742. // contact a working CA, or because we've been
  743. // re-promoted. In this case, we must assume we were
  744. // re-promoted, and restart the manager.
  745. log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal")
  746. timer.Reset(roleChangeTimeout)
  747. select {
  748. case forceCertRenewal <- struct{}{}:
  749. case <-timer.C:
  750. log.G(ctx).Warn("failed to trigger certificate renewal after manager stop, restarting manager")
  751. return nil
  752. case <-ctx.Done():
  753. return ctx.Err()
  754. }
  755. // Now that the renewal request has been sent to the
  756. // renewal goroutine, wait for a change in role.
  757. select {
  758. case <-timer.C:
  759. log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  760. case <-workerRole:
  761. case <-ctx.Done():
  762. return ctx.Err()
  763. }
  764. return nil
  765. }()
  766. if err != nil {
  767. return err
  768. }
  769. ready = nil
  770. }
  771. }
  772. type persistentRemotes struct {
  773. sync.RWMutex
  774. c *sync.Cond
  775. remotes.Remotes
  776. storePath string
  777. lastSavedState []api.Peer
  778. }
  779. func newPersistentRemotes(f string, peers ...api.Peer) *persistentRemotes {
  780. pr := &persistentRemotes{
  781. storePath: f,
  782. Remotes: remotes.NewRemotes(peers...),
  783. }
  784. pr.c = sync.NewCond(pr.RLocker())
  785. return pr
  786. }
  787. func (s *persistentRemotes) Observe(peer api.Peer, weight int) {
  788. s.Lock()
  789. defer s.Unlock()
  790. s.Remotes.Observe(peer, weight)
  791. s.c.Broadcast()
  792. if err := s.save(); err != nil {
  793. logrus.Errorf("error writing cluster state file: %v", err)
  794. return
  795. }
  796. return
  797. }
  798. func (s *persistentRemotes) Remove(peers ...api.Peer) {
  799. s.Lock()
  800. defer s.Unlock()
  801. s.Remotes.Remove(peers...)
  802. if err := s.save(); err != nil {
  803. logrus.Errorf("error writing cluster state file: %v", err)
  804. return
  805. }
  806. return
  807. }
  808. func (s *persistentRemotes) save() error {
  809. weights := s.Weights()
  810. remotes := make([]api.Peer, 0, len(weights))
  811. for r := range weights {
  812. remotes = append(remotes, r)
  813. }
  814. sort.Sort(sortablePeers(remotes))
  815. if reflect.DeepEqual(remotes, s.lastSavedState) {
  816. return nil
  817. }
  818. dt, err := json.Marshal(remotes)
  819. if err != nil {
  820. return err
  821. }
  822. s.lastSavedState = remotes
  823. return ioutils.AtomicWriteFile(s.storePath, dt, 0600)
  824. }
  825. // WaitSelect waits until at least one remote becomes available and then selects one.
  826. func (s *persistentRemotes) WaitSelect(ctx context.Context) <-chan api.Peer {
  827. c := make(chan api.Peer, 1)
  828. s.RLock()
  829. done := make(chan struct{})
  830. go func() {
  831. select {
  832. case <-ctx.Done():
  833. s.c.Broadcast()
  834. case <-done:
  835. }
  836. }()
  837. go func() {
  838. defer s.RUnlock()
  839. defer close(c)
  840. defer close(done)
  841. for {
  842. if ctx.Err() != nil {
  843. return
  844. }
  845. p, err := s.Select()
  846. if err == nil {
  847. c <- p
  848. return
  849. }
  850. s.c.Wait()
  851. }
  852. }()
  853. return c
  854. }
  855. // sortablePeers is a sort wrapper for []api.Peer
  856. type sortablePeers []api.Peer
  857. func (sp sortablePeers) Less(i, j int) bool { return sp[i].NodeID < sp[j].NodeID }
  858. func (sp sortablePeers) Len() int { return len(sp) }
  859. func (sp sortablePeers) Swap(i, j int) { sp[i], sp[j] = sp[j], sp[i] }