node.go 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008
  1. package node
  2. import (
  3. "bytes"
  4. "crypto/tls"
  5. "encoding/json"
  6. "io/ioutil"
  7. "net"
  8. "os"
  9. "path/filepath"
  10. "reflect"
  11. "sort"
  12. "sync"
  13. "time"
  14. "github.com/Sirupsen/logrus"
  15. "github.com/boltdb/bolt"
  16. "github.com/docker/docker/pkg/plugingetter"
  17. "github.com/docker/swarmkit/agent"
  18. "github.com/docker/swarmkit/agent/exec"
  19. "github.com/docker/swarmkit/api"
  20. "github.com/docker/swarmkit/ca"
  21. "github.com/docker/swarmkit/connectionbroker"
  22. "github.com/docker/swarmkit/ioutils"
  23. "github.com/docker/swarmkit/log"
  24. "github.com/docker/swarmkit/manager"
  25. "github.com/docker/swarmkit/manager/encryption"
  26. "github.com/docker/swarmkit/remotes"
  27. "github.com/docker/swarmkit/watch"
  28. "github.com/docker/swarmkit/xnet"
  29. grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
  30. "github.com/pkg/errors"
  31. "golang.org/x/net/context"
  32. "google.golang.org/grpc"
  33. "google.golang.org/grpc/credentials"
  34. )
  35. const (
  36. stateFilename = "state.json"
  37. roleChangeTimeout = 16 * time.Second
  38. )
  39. var (
  40. errNodeStarted = errors.New("node: already started")
  41. errNodeNotStarted = errors.New("node: not started")
  42. certDirectory = "certificates"
  43. // ErrInvalidUnlockKey is returned when we can't decrypt the TLS certificate
  44. ErrInvalidUnlockKey = errors.New("node is locked, and needs a valid unlock key")
  45. )
  46. // Config provides values for a Node.
  47. type Config struct {
  48. // Hostname is the name of host for agent instance.
  49. Hostname string
  50. // JoinAddr specifies node that should be used for the initial connection to
  51. // other manager in cluster. This should be only one address and optional,
  52. // the actual remotes come from the stored state.
  53. JoinAddr string
  54. // StateDir specifies the directory the node uses to keep the state of the
  55. // remote managers and certificates.
  56. StateDir string
  57. // JoinToken is the token to be used on the first certificate request.
  58. JoinToken string
  59. // ExternalCAs is a list of CAs to which a manager node
  60. // will make certificate signing requests for node certificates.
  61. ExternalCAs []*api.ExternalCA
  62. // ForceNewCluster creates a new cluster from current raft state.
  63. ForceNewCluster bool
  64. // ListenControlAPI specifies address the control API should listen on.
  65. ListenControlAPI string
  66. // ListenRemoteAPI specifies the address for the remote API that agents
  67. // and raft members connect to.
  68. ListenRemoteAPI string
  69. // AdvertiseRemoteAPI specifies the address that should be advertised
  70. // for connections to the remote API (including the raft service).
  71. AdvertiseRemoteAPI string
  72. // Executor specifies the executor to use for the agent.
  73. Executor exec.Executor
  74. // ElectionTick defines the amount of ticks needed without
  75. // leader to trigger a new election
  76. ElectionTick uint32
  77. // HeartbeatTick defines the amount of ticks between each
  78. // heartbeat sent to other members for health-check purposes
  79. HeartbeatTick uint32
  80. // AutoLockManagers determines whether or not an unlock key will be generated
  81. // when bootstrapping a new cluster for the first time
  82. AutoLockManagers bool
  83. // UnlockKey is the key to unlock a node - used for decrypting at rest. This
  84. // only applies to nodes that have already joined a cluster.
  85. UnlockKey []byte
  86. // Availability allows a user to control the current scheduling status of a node
  87. Availability api.NodeSpec_Availability
  88. // PluginGetter provides access to docker's plugin inventory.
  89. PluginGetter plugingetter.PluginGetter
  90. }
  91. // Node implements the primary node functionality for a member of a swarm
  92. // cluster. Node handles workloads and may also run as a manager.
  93. type Node struct {
  94. sync.RWMutex
  95. config *Config
  96. remotes *persistentRemotes
  97. connBroker *connectionbroker.Broker
  98. role string
  99. roleCond *sync.Cond
  100. conn *grpc.ClientConn
  101. connCond *sync.Cond
  102. nodeID string
  103. started chan struct{}
  104. startOnce sync.Once
  105. stopped chan struct{}
  106. stopOnce sync.Once
  107. ready chan struct{} // closed when agent has completed registration and manager(if enabled) is ready to receive control requests
  108. closed chan struct{}
  109. err error
  110. agent *agent.Agent
  111. manager *manager.Manager
  112. notifyNodeChange chan *agent.NodeChanges // used by the agent to relay node updates from the dispatcher Session stream to (*Node).run
  113. unlockKey []byte
  114. }
  115. // RemoteAPIAddr returns address on which remote manager api listens.
  116. // Returns nil if node is not manager.
  117. func (n *Node) RemoteAPIAddr() (string, error) {
  118. n.RLock()
  119. defer n.RUnlock()
  120. if n.manager == nil {
  121. return "", errors.New("manager is not running")
  122. }
  123. addr := n.manager.Addr()
  124. if addr == "" {
  125. return "", errors.New("manager addr is not set")
  126. }
  127. return addr, nil
  128. }
  129. // New returns new Node instance.
  130. func New(c *Config) (*Node, error) {
  131. if err := os.MkdirAll(c.StateDir, 0700); err != nil {
  132. return nil, err
  133. }
  134. stateFile := filepath.Join(c.StateDir, stateFilename)
  135. dt, err := ioutil.ReadFile(stateFile)
  136. var p []api.Peer
  137. if err != nil && !os.IsNotExist(err) {
  138. return nil, err
  139. }
  140. if err == nil {
  141. if err := json.Unmarshal(dt, &p); err != nil {
  142. return nil, err
  143. }
  144. }
  145. n := &Node{
  146. remotes: newPersistentRemotes(stateFile, p...),
  147. role: ca.WorkerRole,
  148. config: c,
  149. started: make(chan struct{}),
  150. stopped: make(chan struct{}),
  151. closed: make(chan struct{}),
  152. ready: make(chan struct{}),
  153. notifyNodeChange: make(chan *agent.NodeChanges, 1),
  154. unlockKey: c.UnlockKey,
  155. }
  156. if n.config.JoinAddr != "" || n.config.ForceNewCluster {
  157. n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename))
  158. if n.config.JoinAddr != "" {
  159. n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight)
  160. }
  161. }
  162. n.connBroker = connectionbroker.New(n.remotes)
  163. n.roleCond = sync.NewCond(n.RLocker())
  164. n.connCond = sync.NewCond(n.RLocker())
  165. return n, nil
  166. }
  167. // BindRemote starts a listener that exposes the remote API.
  168. func (n *Node) BindRemote(ctx context.Context, listenAddr string, advertiseAddr string) error {
  169. n.RLock()
  170. defer n.RUnlock()
  171. if n.manager == nil {
  172. return errors.New("manager is not running")
  173. }
  174. return n.manager.BindRemote(ctx, manager.RemoteAddrs{
  175. ListenAddr: listenAddr,
  176. AdvertiseAddr: advertiseAddr,
  177. })
  178. }
  179. // Start starts a node instance.
  180. func (n *Node) Start(ctx context.Context) error {
  181. err := errNodeStarted
  182. n.startOnce.Do(func() {
  183. close(n.started)
  184. go n.run(ctx)
  185. err = nil // clear error above, only once.
  186. })
  187. return err
  188. }
  189. func (n *Node) run(ctx context.Context) (err error) {
  190. defer func() {
  191. n.err = err
  192. close(n.closed)
  193. }()
  194. ctx, cancel := context.WithCancel(ctx)
  195. defer cancel()
  196. ctx = log.WithModule(ctx, "node")
  197. go func(ctx context.Context) {
  198. select {
  199. case <-ctx.Done():
  200. case <-n.stopped:
  201. cancel()
  202. }
  203. }(ctx)
  204. paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory))
  205. securityConfig, err := n.loadSecurityConfig(ctx, paths)
  206. if err != nil {
  207. return err
  208. }
  209. ctx = log.WithLogger(ctx, log.G(ctx).WithField("node.id", n.NodeID()))
  210. taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db")
  211. if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
  212. return err
  213. }
  214. db, err := bolt.Open(taskDBPath, 0666, nil)
  215. if err != nil {
  216. return err
  217. }
  218. defer db.Close()
  219. agentDone := make(chan struct{})
  220. forceCertRenewal := make(chan struct{})
  221. renewCert := func() {
  222. for {
  223. select {
  224. case forceCertRenewal <- struct{}{}:
  225. return
  226. case <-agentDone:
  227. return
  228. case <-n.notifyNodeChange:
  229. // consume from the channel to avoid blocking the writer
  230. }
  231. }
  232. }
  233. go func() {
  234. for {
  235. select {
  236. case <-agentDone:
  237. return
  238. case nodeChanges := <-n.notifyNodeChange:
  239. n.Lock()
  240. currentRole := n.role
  241. n.Unlock()
  242. if nodeChanges.Node != nil {
  243. role := ca.WorkerRole
  244. if nodeChanges.Node.Role == api.NodeRoleManager {
  245. role = ca.ManagerRole
  246. }
  247. // If the server is sending us a ForceRenewal State, or if the new node role doesn't match our current role, renew
  248. if currentRole != role || nodeChanges.Node.Certificate.Status.State == api.IssuanceStateRotate {
  249. renewCert()
  250. }
  251. }
  252. if nodeChanges.RootCert != nil {
  253. // We only want to update the root CA if this is a worker node. Manager nodes directly watch the raft
  254. // store and update the root CA, with the necessary signer, from the raft store (since the managers
  255. // need the CA key as well to potentially issue new TLS certificates).
  256. if currentRole == ca.ManagerRole || bytes.Equal(nodeChanges.RootCert, securityConfig.RootCA().Certs) {
  257. continue
  258. }
  259. newRootCA, err := ca.NewRootCA(nodeChanges.RootCert, nil, nil, ca.DefaultNodeCertExpiration, nil)
  260. if err != nil {
  261. log.G(ctx).WithError(err).Error("invalid new root certificate from the dispatcher")
  262. continue
  263. }
  264. if err := ca.SaveRootCA(newRootCA, paths.RootCA); err != nil {
  265. log.G(ctx).WithError(err).Error("could not save new root certificate from the dispatcher")
  266. continue
  267. }
  268. if err := securityConfig.UpdateRootCA(&newRootCA, newRootCA.Pool); err != nil {
  269. log.G(ctx).WithError(err).Error("could not use new root CA from dispatcher")
  270. continue
  271. }
  272. }
  273. }
  274. }
  275. }()
  276. var wg sync.WaitGroup
  277. wg.Add(3)
  278. updates := ca.RenewTLSConfig(ctx, securityConfig, n.connBroker, forceCertRenewal)
  279. go func() {
  280. for certUpdate := range updates {
  281. if certUpdate.Err != nil {
  282. logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
  283. continue
  284. }
  285. n.Lock()
  286. n.role = certUpdate.Role
  287. n.roleCond.Broadcast()
  288. n.Unlock()
  289. }
  290. wg.Done()
  291. }()
  292. role := n.role
  293. managerReady := make(chan struct{})
  294. agentReady := make(chan struct{})
  295. var managerErr error
  296. var agentErr error
  297. go func() {
  298. managerErr = n.superviseManager(ctx, securityConfig, paths.RootCA, managerReady, forceCertRenewal) // store err and loop
  299. wg.Done()
  300. cancel()
  301. }()
  302. go func() {
  303. agentErr = n.runAgent(ctx, db, securityConfig, agentReady)
  304. wg.Done()
  305. cancel()
  306. close(agentDone)
  307. }()
  308. go func() {
  309. <-agentReady
  310. if role == ca.ManagerRole {
  311. workerRole := make(chan struct{})
  312. waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  313. go func() {
  314. if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  315. close(workerRole)
  316. }
  317. }()
  318. select {
  319. case <-managerReady:
  320. case <-workerRole:
  321. }
  322. waitRoleCancel()
  323. }
  324. close(n.ready)
  325. }()
  326. wg.Wait()
  327. if managerErr != nil && managerErr != context.Canceled {
  328. return managerErr
  329. }
  330. if agentErr != nil && agentErr != context.Canceled {
  331. return agentErr
  332. }
  333. return err
  334. }
  335. // Stop stops node execution
  336. func (n *Node) Stop(ctx context.Context) error {
  337. select {
  338. case <-n.started:
  339. default:
  340. return errNodeNotStarted
  341. }
  342. // ask agent to clean up assignments
  343. n.Lock()
  344. if n.agent != nil {
  345. if err := n.agent.Leave(ctx); err != nil {
  346. log.G(ctx).WithError(err).Error("agent failed to clean up assignments")
  347. }
  348. }
  349. n.Unlock()
  350. n.stopOnce.Do(func() {
  351. close(n.stopped)
  352. })
  353. select {
  354. case <-n.closed:
  355. return nil
  356. case <-ctx.Done():
  357. return ctx.Err()
  358. }
  359. }
  360. // Err returns the error that caused the node to shutdown or nil. Err blocks
  361. // until the node has fully shut down.
  362. func (n *Node) Err(ctx context.Context) error {
  363. select {
  364. case <-n.closed:
  365. return n.err
  366. case <-ctx.Done():
  367. return ctx.Err()
  368. }
  369. }
  370. func (n *Node) runAgent(ctx context.Context, db *bolt.DB, securityConfig *ca.SecurityConfig, ready chan<- struct{}) error {
  371. waitCtx, waitCancel := context.WithCancel(ctx)
  372. remotesCh := n.remotes.WaitSelect(ctx)
  373. controlCh := n.ListenControlSocket(waitCtx)
  374. waitPeer:
  375. for {
  376. select {
  377. case <-ctx.Done():
  378. break waitPeer
  379. case <-remotesCh:
  380. break waitPeer
  381. case conn := <-controlCh:
  382. if conn != nil {
  383. break waitPeer
  384. }
  385. }
  386. }
  387. waitCancel()
  388. select {
  389. case <-ctx.Done():
  390. return ctx.Err()
  391. default:
  392. }
  393. secChangeQueue := watch.NewQueue()
  394. defer secChangeQueue.Close()
  395. secChangesCh, secChangesCancel := secChangeQueue.Watch()
  396. defer secChangesCancel()
  397. securityConfig.SetWatch(secChangeQueue)
  398. rootCA := securityConfig.RootCA()
  399. issuer := securityConfig.IssuerInfo()
  400. a, err := agent.New(&agent.Config{
  401. Hostname: n.config.Hostname,
  402. ConnBroker: n.connBroker,
  403. Executor: n.config.Executor,
  404. DB: db,
  405. NotifyNodeChange: n.notifyNodeChange,
  406. NotifyTLSChange: secChangesCh,
  407. Credentials: securityConfig.ClientTLSCreds,
  408. NodeTLSInfo: &api.NodeTLSInfo{
  409. TrustRoot: rootCA.Certs,
  410. CertIssuerPublicKey: issuer.PublicKey,
  411. CertIssuerSubject: issuer.Subject,
  412. },
  413. })
  414. if err != nil {
  415. return err
  416. }
  417. if err := a.Start(ctx); err != nil {
  418. return err
  419. }
  420. n.Lock()
  421. n.agent = a
  422. n.Unlock()
  423. defer func() {
  424. n.Lock()
  425. n.agent = nil
  426. n.Unlock()
  427. }()
  428. go func() {
  429. <-a.Ready()
  430. close(ready)
  431. }()
  432. // todo: manually call stop on context cancellation?
  433. return a.Err(context.Background())
  434. }
  435. // Ready returns a channel that is closed after node's initialization has
  436. // completes for the first time.
  437. func (n *Node) Ready() <-chan struct{} {
  438. return n.ready
  439. }
  440. func (n *Node) setControlSocket(conn *grpc.ClientConn) {
  441. n.Lock()
  442. if n.conn != nil {
  443. n.conn.Close()
  444. }
  445. n.conn = conn
  446. n.connBroker.SetLocalConn(conn)
  447. n.connCond.Broadcast()
  448. n.Unlock()
  449. }
  450. // ListenControlSocket listens changes of a connection for managing the
  451. // cluster control api
  452. func (n *Node) ListenControlSocket(ctx context.Context) <-chan *grpc.ClientConn {
  453. c := make(chan *grpc.ClientConn, 1)
  454. n.RLock()
  455. conn := n.conn
  456. c <- conn
  457. done := make(chan struct{})
  458. go func() {
  459. select {
  460. case <-ctx.Done():
  461. n.connCond.Broadcast()
  462. case <-done:
  463. }
  464. }()
  465. go func() {
  466. defer close(c)
  467. defer close(done)
  468. defer n.RUnlock()
  469. for {
  470. select {
  471. case <-ctx.Done():
  472. return
  473. default:
  474. }
  475. if conn == n.conn {
  476. n.connCond.Wait()
  477. continue
  478. }
  479. conn = n.conn
  480. select {
  481. case c <- conn:
  482. case <-ctx.Done():
  483. return
  484. }
  485. }
  486. }()
  487. return c
  488. }
  489. // NodeID returns current node's ID. May be empty if not set.
  490. func (n *Node) NodeID() string {
  491. n.RLock()
  492. defer n.RUnlock()
  493. return n.nodeID
  494. }
  495. // Manager returns manager instance started by node. May be nil.
  496. func (n *Node) Manager() *manager.Manager {
  497. n.RLock()
  498. defer n.RUnlock()
  499. return n.manager
  500. }
  501. // Agent returns agent instance started by node. May be nil.
  502. func (n *Node) Agent() *agent.Agent {
  503. n.RLock()
  504. defer n.RUnlock()
  505. return n.agent
  506. }
  507. // IsStateDirty returns true if any objects have been added to raft which make
  508. // the state "dirty". Currently, the existence of any object other than the
  509. // default cluster or the local node implies a dirty state.
  510. func (n *Node) IsStateDirty() (bool, error) {
  511. n.RLock()
  512. defer n.RUnlock()
  513. if n.manager == nil {
  514. return false, errors.New("node is not a manager")
  515. }
  516. return n.manager.IsStateDirty()
  517. }
  518. // Remotes returns a list of known peers known to node.
  519. func (n *Node) Remotes() []api.Peer {
  520. weights := n.remotes.Weights()
  521. remotes := make([]api.Peer, 0, len(weights))
  522. for p := range weights {
  523. remotes = append(remotes, p)
  524. }
  525. return remotes
  526. }
  527. func (n *Node) loadSecurityConfig(ctx context.Context, paths *ca.SecurityConfigPaths) (*ca.SecurityConfig, error) {
  528. var securityConfig *ca.SecurityConfig
  529. krw := ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
  530. if err := krw.Migrate(); err != nil {
  531. return nil, err
  532. }
  533. // Check if we already have a valid certificates on disk.
  534. rootCA, err := ca.GetLocalRootCA(paths.RootCA)
  535. if err != nil && err != ca.ErrNoLocalRootCA {
  536. return nil, err
  537. }
  538. if err == nil {
  539. // if forcing a new cluster, we allow the certificates to be expired - a new set will be generated
  540. securityConfig, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
  541. if err != nil {
  542. _, isInvalidKEK := errors.Cause(err).(ca.ErrInvalidKEK)
  543. if isInvalidKEK {
  544. return nil, ErrInvalidUnlockKey
  545. } else if !os.IsNotExist(err) {
  546. return nil, errors.Wrapf(err, "error while loading TLS certificate in %s", paths.Node.Cert)
  547. }
  548. }
  549. }
  550. if securityConfig == nil {
  551. if n.config.JoinAddr == "" {
  552. // if we're not joining a cluster, bootstrap a new one - and we have to set the unlock key
  553. n.unlockKey = nil
  554. if n.config.AutoLockManagers {
  555. n.unlockKey = encryption.GenerateSecretKey()
  556. }
  557. krw = ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
  558. rootCA, err = ca.CreateRootCA(ca.DefaultRootCN)
  559. if err != nil {
  560. return nil, err
  561. }
  562. if err := ca.SaveRootCA(rootCA, paths.RootCA); err != nil {
  563. return nil, err
  564. }
  565. log.G(ctx).Debug("generated CA key and certificate")
  566. } else if err == ca.ErrNoLocalRootCA { // from previous error loading the root CA from disk
  567. rootCA, err = ca.DownloadRootCA(ctx, paths.RootCA, n.config.JoinToken, n.connBroker)
  568. if err != nil {
  569. return nil, err
  570. }
  571. log.G(ctx).Debug("downloaded CA certificate")
  572. }
  573. // Obtain new certs and setup TLS certificates renewal for this node:
  574. // - If certificates weren't present on disk, we call CreateSecurityConfig, which blocks
  575. // until a valid certificate has been issued.
  576. // - We wait for CreateSecurityConfig to finish since we need a certificate to operate.
  577. // Attempt to load certificate from disk
  578. securityConfig, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
  579. if err == nil {
  580. log.G(ctx).WithFields(logrus.Fields{
  581. "node.id": securityConfig.ClientTLSCreds.NodeID(),
  582. }).Debugf("loaded TLS certificate")
  583. } else {
  584. if _, ok := errors.Cause(err).(ca.ErrInvalidKEK); ok {
  585. return nil, ErrInvalidUnlockKey
  586. }
  587. log.G(ctx).WithError(err).Debugf("no node credentials found in: %s", krw.Target())
  588. securityConfig, err = rootCA.CreateSecurityConfig(ctx, krw, ca.CertificateRequestConfig{
  589. Token: n.config.JoinToken,
  590. Availability: n.config.Availability,
  591. ConnBroker: n.connBroker,
  592. })
  593. if err != nil {
  594. return nil, err
  595. }
  596. }
  597. }
  598. n.Lock()
  599. n.role = securityConfig.ClientTLSCreds.Role()
  600. n.nodeID = securityConfig.ClientTLSCreds.NodeID()
  601. n.roleCond.Broadcast()
  602. n.Unlock()
  603. return securityConfig, nil
  604. }
  605. func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) error {
  606. opts := []grpc.DialOption{
  607. grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor),
  608. grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor),
  609. }
  610. insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
  611. opts = append(opts, grpc.WithTransportCredentials(insecureCreds))
  612. addr := n.config.ListenControlAPI
  613. opts = append(opts, grpc.WithDialer(
  614. func(addr string, timeout time.Duration) (net.Conn, error) {
  615. return xnet.DialTimeoutLocal(addr, timeout)
  616. }))
  617. conn, err := grpc.Dial(addr, opts...)
  618. if err != nil {
  619. return err
  620. }
  621. client := api.NewHealthClient(conn)
  622. for {
  623. resp, err := client.Check(ctx, &api.HealthCheckRequest{Service: "ControlAPI"})
  624. if err != nil {
  625. return err
  626. }
  627. if resp.Status == api.HealthCheckResponse_SERVING {
  628. break
  629. }
  630. time.Sleep(500 * time.Millisecond)
  631. }
  632. n.setControlSocket(conn)
  633. if ready != nil {
  634. close(ready)
  635. }
  636. return nil
  637. }
  638. func (n *Node) waitRole(ctx context.Context, role string) error {
  639. n.roleCond.L.Lock()
  640. if role == n.role {
  641. n.roleCond.L.Unlock()
  642. return nil
  643. }
  644. finishCh := make(chan struct{})
  645. defer close(finishCh)
  646. go func() {
  647. select {
  648. case <-finishCh:
  649. case <-ctx.Done():
  650. // call broadcast to shutdown this function
  651. n.roleCond.Broadcast()
  652. }
  653. }()
  654. defer n.roleCond.L.Unlock()
  655. for role != n.role {
  656. n.roleCond.Wait()
  657. select {
  658. case <-ctx.Done():
  659. return ctx.Err()
  660. default:
  661. }
  662. }
  663. return nil
  664. }
  665. func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, workerRole <-chan struct{}) (bool, error) {
  666. var remoteAPI *manager.RemoteAddrs
  667. if n.config.ListenRemoteAPI != "" {
  668. remoteAPI = &manager.RemoteAddrs{
  669. ListenAddr: n.config.ListenRemoteAPI,
  670. AdvertiseAddr: n.config.AdvertiseRemoteAPI,
  671. }
  672. }
  673. remoteAddr, _ := n.remotes.Select(n.NodeID())
  674. m, err := manager.New(&manager.Config{
  675. ForceNewCluster: n.config.ForceNewCluster,
  676. RemoteAPI: remoteAPI,
  677. ControlAPI: n.config.ListenControlAPI,
  678. SecurityConfig: securityConfig,
  679. ExternalCAs: n.config.ExternalCAs,
  680. JoinRaft: remoteAddr.Addr,
  681. StateDir: n.config.StateDir,
  682. HeartbeatTick: n.config.HeartbeatTick,
  683. ElectionTick: n.config.ElectionTick,
  684. AutoLockManagers: n.config.AutoLockManagers,
  685. UnlockKey: n.unlockKey,
  686. Availability: n.config.Availability,
  687. PluginGetter: n.config.PluginGetter,
  688. RootCAPaths: rootPaths,
  689. })
  690. if err != nil {
  691. return false, err
  692. }
  693. done := make(chan struct{})
  694. var runErr error
  695. go func(logger *logrus.Entry) {
  696. if err := m.Run(log.WithLogger(context.Background(), logger)); err != nil {
  697. runErr = err
  698. }
  699. close(done)
  700. }(log.G(ctx))
  701. var clearData bool
  702. defer func() {
  703. n.Lock()
  704. n.manager = nil
  705. n.Unlock()
  706. m.Stop(ctx, clearData)
  707. <-done
  708. n.setControlSocket(nil)
  709. }()
  710. n.Lock()
  711. n.manager = m
  712. n.Unlock()
  713. connCtx, connCancel := context.WithCancel(ctx)
  714. defer connCancel()
  715. go n.initManagerConnection(connCtx, ready)
  716. // wait for manager stop or for role change
  717. select {
  718. case <-done:
  719. return false, runErr
  720. case <-workerRole:
  721. log.G(ctx).Info("role changed to worker, stopping manager")
  722. clearData = true
  723. case <-m.RemovedFromRaft():
  724. log.G(ctx).Info("manager removed from raft cluster, stopping manager")
  725. clearData = true
  726. case <-ctx.Done():
  727. return false, ctx.Err()
  728. }
  729. return clearData, nil
  730. }
  731. func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, forceCertRenewal chan struct{}) error {
  732. for {
  733. if err := n.waitRole(ctx, ca.ManagerRole); err != nil {
  734. return err
  735. }
  736. workerRole := make(chan struct{})
  737. waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  738. go func() {
  739. if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  740. close(workerRole)
  741. }
  742. }()
  743. wasRemoved, err := n.runManager(ctx, securityConfig, rootPaths, ready, workerRole)
  744. if err != nil {
  745. waitRoleCancel()
  746. return errors.Wrap(err, "manager stopped")
  747. }
  748. // If the manager stopped running and our role is still
  749. // "manager", it's possible that the manager was demoted and
  750. // the agent hasn't realized this yet. We should wait for the
  751. // role to change instead of restarting the manager immediately.
  752. err = func() error {
  753. timer := time.NewTimer(roleChangeTimeout)
  754. defer timer.Stop()
  755. defer waitRoleCancel()
  756. select {
  757. case <-timer.C:
  758. case <-workerRole:
  759. return nil
  760. case <-ctx.Done():
  761. return ctx.Err()
  762. }
  763. if !wasRemoved {
  764. log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  765. return nil
  766. }
  767. // We need to be extra careful about restarting the
  768. // manager. It may cause the node to wrongly join under
  769. // a new Raft ID. Since we didn't see a role change
  770. // yet, force a certificate renewal. If the certificate
  771. // comes back with a worker role, we know we shouldn't
  772. // restart the manager. However, if we don't see
  773. // workerRole get closed, it means we didn't switch to
  774. // a worker certificate, either because we couldn't
  775. // contact a working CA, or because we've been
  776. // re-promoted. In this case, we must assume we were
  777. // re-promoted, and restart the manager.
  778. log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal")
  779. timer.Reset(roleChangeTimeout)
  780. select {
  781. case forceCertRenewal <- struct{}{}:
  782. case <-timer.C:
  783. log.G(ctx).Warn("failed to trigger certificate renewal after manager stop, restarting manager")
  784. return nil
  785. case <-ctx.Done():
  786. return ctx.Err()
  787. }
  788. // Now that the renewal request has been sent to the
  789. // renewal goroutine, wait for a change in role.
  790. select {
  791. case <-timer.C:
  792. log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  793. case <-workerRole:
  794. case <-ctx.Done():
  795. return ctx.Err()
  796. }
  797. return nil
  798. }()
  799. if err != nil {
  800. return err
  801. }
  802. ready = nil
  803. }
  804. }
  805. type persistentRemotes struct {
  806. sync.RWMutex
  807. c *sync.Cond
  808. remotes.Remotes
  809. storePath string
  810. lastSavedState []api.Peer
  811. }
  812. func newPersistentRemotes(f string, peers ...api.Peer) *persistentRemotes {
  813. pr := &persistentRemotes{
  814. storePath: f,
  815. Remotes: remotes.NewRemotes(peers...),
  816. }
  817. pr.c = sync.NewCond(pr.RLocker())
  818. return pr
  819. }
  820. func (s *persistentRemotes) Observe(peer api.Peer, weight int) {
  821. s.Lock()
  822. defer s.Unlock()
  823. s.Remotes.Observe(peer, weight)
  824. s.c.Broadcast()
  825. if err := s.save(); err != nil {
  826. logrus.Errorf("error writing cluster state file: %v", err)
  827. return
  828. }
  829. return
  830. }
  831. func (s *persistentRemotes) Remove(peers ...api.Peer) {
  832. s.Lock()
  833. defer s.Unlock()
  834. s.Remotes.Remove(peers...)
  835. if err := s.save(); err != nil {
  836. logrus.Errorf("error writing cluster state file: %v", err)
  837. return
  838. }
  839. return
  840. }
  841. func (s *persistentRemotes) save() error {
  842. weights := s.Weights()
  843. remotes := make([]api.Peer, 0, len(weights))
  844. for r := range weights {
  845. remotes = append(remotes, r)
  846. }
  847. sort.Sort(sortablePeers(remotes))
  848. if reflect.DeepEqual(remotes, s.lastSavedState) {
  849. return nil
  850. }
  851. dt, err := json.Marshal(remotes)
  852. if err != nil {
  853. return err
  854. }
  855. s.lastSavedState = remotes
  856. return ioutils.AtomicWriteFile(s.storePath, dt, 0600)
  857. }
  858. // WaitSelect waits until at least one remote becomes available and then selects one.
  859. func (s *persistentRemotes) WaitSelect(ctx context.Context) <-chan api.Peer {
  860. c := make(chan api.Peer, 1)
  861. s.RLock()
  862. done := make(chan struct{})
  863. go func() {
  864. select {
  865. case <-ctx.Done():
  866. s.c.Broadcast()
  867. case <-done:
  868. }
  869. }()
  870. go func() {
  871. defer s.RUnlock()
  872. defer close(c)
  873. defer close(done)
  874. for {
  875. if ctx.Err() != nil {
  876. return
  877. }
  878. p, err := s.Select()
  879. if err == nil {
  880. c <- p
  881. return
  882. }
  883. s.c.Wait()
  884. }
  885. }()
  886. return c
  887. }
  888. // sortablePeers is a sort wrapper for []api.Peer
  889. type sortablePeers []api.Peer
  890. func (sp sortablePeers) Less(i, j int) bool { return sp[i].NodeID < sp[j].NodeID }
  891. func (sp sortablePeers) Len() int { return len(sp) }
  892. func (sp sortablePeers) Swap(i, j int) { sp[i], sp[j] = sp[j], sp[i] }