node.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
  1. package node
  2. import (
  3. "bytes"
  4. "crypto/tls"
  5. "encoding/json"
  6. "io/ioutil"
  7. "net"
  8. "os"
  9. "path/filepath"
  10. "reflect"
  11. "sort"
  12. "sync"
  13. "time"
  14. "github.com/Sirupsen/logrus"
  15. "github.com/boltdb/bolt"
  16. "github.com/docker/docker/pkg/plugingetter"
  17. "github.com/docker/swarmkit/agent"
  18. "github.com/docker/swarmkit/agent/exec"
  19. "github.com/docker/swarmkit/api"
  20. "github.com/docker/swarmkit/ca"
  21. "github.com/docker/swarmkit/connectionbroker"
  22. "github.com/docker/swarmkit/ioutils"
  23. "github.com/docker/swarmkit/log"
  24. "github.com/docker/swarmkit/manager"
  25. "github.com/docker/swarmkit/manager/encryption"
  26. "github.com/docker/swarmkit/remotes"
  27. "github.com/docker/swarmkit/watch"
  28. "github.com/docker/swarmkit/xnet"
  29. grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
  30. "github.com/pkg/errors"
  31. "golang.org/x/net/context"
  32. "google.golang.org/grpc"
  33. "google.golang.org/grpc/credentials"
  34. )
  35. const (
  36. stateFilename = "state.json"
  37. roleChangeTimeout = 16 * time.Second
  38. )
  39. var (
  40. errNodeStarted = errors.New("node: already started")
  41. errNodeNotStarted = errors.New("node: not started")
  42. certDirectory = "certificates"
  43. // ErrInvalidUnlockKey is returned when we can't decrypt the TLS certificate
  44. ErrInvalidUnlockKey = errors.New("node is locked, and needs a valid unlock key")
  45. )
  46. // Config provides values for a Node.
  47. type Config struct {
  48. // Hostname is the name of host for agent instance.
  49. Hostname string
  50. // JoinAddr specifies node that should be used for the initial connection to
  51. // other manager in cluster. This should be only one address and optional,
  52. // the actual remotes come from the stored state.
  53. JoinAddr string
  54. // StateDir specifies the directory the node uses to keep the state of the
  55. // remote managers and certificates.
  56. StateDir string
  57. // JoinToken is the token to be used on the first certificate request.
  58. JoinToken string
  59. // ExternalCAs is a list of CAs to which a manager node
  60. // will make certificate signing requests for node certificates.
  61. ExternalCAs []*api.ExternalCA
  62. // ForceNewCluster creates a new cluster from current raft state.
  63. ForceNewCluster bool
  64. // ListenControlAPI specifies address the control API should listen on.
  65. ListenControlAPI string
  66. // ListenRemoteAPI specifies the address for the remote API that agents
  67. // and raft members connect to.
  68. ListenRemoteAPI string
  69. // AdvertiseRemoteAPI specifies the address that should be advertised
  70. // for connections to the remote API (including the raft service).
  71. AdvertiseRemoteAPI string
  72. // Executor specifies the executor to use for the agent.
  73. Executor exec.Executor
  74. // ElectionTick defines the amount of ticks needed without
  75. // leader to trigger a new election
  76. ElectionTick uint32
  77. // HeartbeatTick defines the amount of ticks between each
  78. // heartbeat sent to other members for health-check purposes
  79. HeartbeatTick uint32
  80. // AutoLockManagers determines whether or not an unlock key will be generated
  81. // when bootstrapping a new cluster for the first time
  82. AutoLockManagers bool
  83. // UnlockKey is the key to unlock a node - used for decrypting at rest. This
  84. // only applies to nodes that have already joined a cluster.
  85. UnlockKey []byte
  86. // Availability allows a user to control the current scheduling status of a node
  87. Availability api.NodeSpec_Availability
  88. // PluginGetter provides access to docker's plugin inventory.
  89. PluginGetter plugingetter.PluginGetter
  90. }
  91. // Node implements the primary node functionality for a member of a swarm
  92. // cluster. Node handles workloads and may also run as a manager.
  93. type Node struct {
  94. sync.RWMutex
  95. config *Config
  96. remotes *persistentRemotes
  97. connBroker *connectionbroker.Broker
  98. role string
  99. roleCond *sync.Cond
  100. conn *grpc.ClientConn
  101. connCond *sync.Cond
  102. nodeID string
  103. started chan struct{}
  104. startOnce sync.Once
  105. stopped chan struct{}
  106. stopOnce sync.Once
  107. ready chan struct{} // closed when agent has completed registration and manager(if enabled) is ready to receive control requests
  108. closed chan struct{}
  109. err error
  110. agent *agent.Agent
  111. manager *manager.Manager
  112. notifyNodeChange chan *agent.NodeChanges // used by the agent to relay node updates from the dispatcher Session stream to (*Node).run
  113. unlockKey []byte
  114. // lastNodeRole is the last-seen value of Node.Role, used to make role
  115. // changes "edge triggered" and avoid renewal loops.
  116. lastNodeRole lastSeenRole
  117. // lastNodeDesiredRole is the last-seen value of Node.Spec.DesiredRole,
  118. // used to make role changes "edge triggered" and avoid renewal loops.
  119. // This exists in addition to lastNodeRole to support older CAs that
  120. // only fill in the DesiredRole field.
  121. lastNodeDesiredRole lastSeenRole
  122. }
  123. type lastSeenRole struct {
  124. role *api.NodeRole
  125. }
  126. // observe notes the latest value of this node role, and returns true if it
  127. // is the first seen value, or is different from the most recently seen value.
  128. func (l *lastSeenRole) observe(newRole api.NodeRole) bool {
  129. changed := l.role == nil || *l.role != newRole
  130. if l.role == nil {
  131. l.role = new(api.NodeRole)
  132. }
  133. *l.role = newRole
  134. return changed
  135. }
  136. // RemoteAPIAddr returns address on which remote manager api listens.
  137. // Returns nil if node is not manager.
  138. func (n *Node) RemoteAPIAddr() (string, error) {
  139. n.RLock()
  140. defer n.RUnlock()
  141. if n.manager == nil {
  142. return "", errors.New("manager is not running")
  143. }
  144. addr := n.manager.Addr()
  145. if addr == "" {
  146. return "", errors.New("manager addr is not set")
  147. }
  148. return addr, nil
  149. }
  150. // New returns new Node instance.
  151. func New(c *Config) (*Node, error) {
  152. if err := os.MkdirAll(c.StateDir, 0700); err != nil {
  153. return nil, err
  154. }
  155. stateFile := filepath.Join(c.StateDir, stateFilename)
  156. dt, err := ioutil.ReadFile(stateFile)
  157. var p []api.Peer
  158. if err != nil && !os.IsNotExist(err) {
  159. return nil, err
  160. }
  161. if err == nil {
  162. if err := json.Unmarshal(dt, &p); err != nil {
  163. return nil, err
  164. }
  165. }
  166. n := &Node{
  167. remotes: newPersistentRemotes(stateFile, p...),
  168. role: ca.WorkerRole,
  169. config: c,
  170. started: make(chan struct{}),
  171. stopped: make(chan struct{}),
  172. closed: make(chan struct{}),
  173. ready: make(chan struct{}),
  174. notifyNodeChange: make(chan *agent.NodeChanges, 1),
  175. unlockKey: c.UnlockKey,
  176. }
  177. if n.config.JoinAddr != "" || n.config.ForceNewCluster {
  178. n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename))
  179. if n.config.JoinAddr != "" {
  180. n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight)
  181. }
  182. }
  183. n.connBroker = connectionbroker.New(n.remotes)
  184. n.roleCond = sync.NewCond(n.RLocker())
  185. n.connCond = sync.NewCond(n.RLocker())
  186. return n, nil
  187. }
  188. // BindRemote starts a listener that exposes the remote API.
  189. func (n *Node) BindRemote(ctx context.Context, listenAddr string, advertiseAddr string) error {
  190. n.RLock()
  191. defer n.RUnlock()
  192. if n.manager == nil {
  193. return errors.New("manager is not running")
  194. }
  195. return n.manager.BindRemote(ctx, manager.RemoteAddrs{
  196. ListenAddr: listenAddr,
  197. AdvertiseAddr: advertiseAddr,
  198. })
  199. }
  200. // Start starts a node instance.
  201. func (n *Node) Start(ctx context.Context) error {
  202. err := errNodeStarted
  203. n.startOnce.Do(func() {
  204. close(n.started)
  205. go n.run(ctx)
  206. err = nil // clear error above, only once.
  207. })
  208. return err
  209. }
  210. func (n *Node) run(ctx context.Context) (err error) {
  211. defer func() {
  212. n.err = err
  213. close(n.closed)
  214. }()
  215. ctx, cancel := context.WithCancel(ctx)
  216. defer cancel()
  217. ctx = log.WithModule(ctx, "node")
  218. go func(ctx context.Context) {
  219. select {
  220. case <-ctx.Done():
  221. case <-n.stopped:
  222. cancel()
  223. }
  224. }(ctx)
  225. paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory))
  226. securityConfig, err := n.loadSecurityConfig(ctx, paths)
  227. if err != nil {
  228. return err
  229. }
  230. ctx = log.WithLogger(ctx, log.G(ctx).WithField("node.id", n.NodeID()))
  231. taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db")
  232. if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
  233. return err
  234. }
  235. db, err := bolt.Open(taskDBPath, 0666, nil)
  236. if err != nil {
  237. return err
  238. }
  239. defer db.Close()
  240. agentDone := make(chan struct{})
  241. forceCertRenewal := make(chan struct{})
  242. renewCert := func() {
  243. for {
  244. select {
  245. case forceCertRenewal <- struct{}{}:
  246. return
  247. case <-agentDone:
  248. return
  249. case <-n.notifyNodeChange:
  250. // consume from the channel to avoid blocking the writer
  251. }
  252. }
  253. }
  254. go func() {
  255. for {
  256. select {
  257. case <-agentDone:
  258. return
  259. case nodeChanges := <-n.notifyNodeChange:
  260. n.Lock()
  261. currentRole := api.NodeRoleWorker
  262. if n.role == ca.ManagerRole {
  263. currentRole = api.NodeRoleManager
  264. }
  265. n.Unlock()
  266. if nodeChanges.Node != nil {
  267. // This is a bit complex to be backward compatible with older CAs that
  268. // don't support the Node.Role field. They only use what's presently
  269. // called DesiredRole.
  270. // 1) If we haven't seen the node object before, and the desired role
  271. // is different from our current role, renew the cert. This covers
  272. // the case of starting up after a role change.
  273. // 2) If we have seen the node before, the desired role is
  274. // different from our current role, and either the actual role or
  275. // desired role has changed relative to the last values we saw in
  276. // those fields, renew the cert. This covers the case of the role
  277. // changing while this node is running, but prevents getting into a
  278. // rotation loop if Node.Role isn't what we expect (because it's
  279. // unset). We may renew the certificate an extra time (first when
  280. // DesiredRole changes, and then again when Role changes).
  281. // 3) If the server is sending us IssuanceStateRotate, renew the cert as
  282. // requested by the CA.
  283. roleChanged := n.lastNodeRole.observe(nodeChanges.Node.Role)
  284. desiredRoleChanged := n.lastNodeDesiredRole.observe(nodeChanges.Node.Spec.DesiredRole)
  285. if (currentRole != nodeChanges.Node.Spec.DesiredRole &&
  286. ((roleChanged && currentRole != nodeChanges.Node.Role) ||
  287. desiredRoleChanged)) ||
  288. nodeChanges.Node.Certificate.Status.State == api.IssuanceStateRotate {
  289. renewCert()
  290. }
  291. }
  292. if nodeChanges.RootCert != nil {
  293. // We only want to update the root CA if this is a worker node. Manager nodes directly watch the raft
  294. // store and update the root CA, with the necessary signer, from the raft store (since the managers
  295. // need the CA key as well to potentially issue new TLS certificates).
  296. if currentRole == api.NodeRoleManager || bytes.Equal(nodeChanges.RootCert, securityConfig.RootCA().Certs) {
  297. continue
  298. }
  299. newRootCA, err := ca.NewRootCA(nodeChanges.RootCert, nil, nil, ca.DefaultNodeCertExpiration, nil)
  300. if err != nil {
  301. log.G(ctx).WithError(err).Error("invalid new root certificate from the dispatcher")
  302. continue
  303. }
  304. if err := ca.SaveRootCA(newRootCA, paths.RootCA); err != nil {
  305. log.G(ctx).WithError(err).Error("could not save new root certificate from the dispatcher")
  306. continue
  307. }
  308. if err := securityConfig.UpdateRootCA(&newRootCA, newRootCA.Pool); err != nil {
  309. log.G(ctx).WithError(err).Error("could not use new root CA from dispatcher")
  310. continue
  311. }
  312. }
  313. }
  314. }
  315. }()
  316. var wg sync.WaitGroup
  317. wg.Add(3)
  318. updates := ca.RenewTLSConfig(ctx, securityConfig, n.connBroker, forceCertRenewal)
  319. go func() {
  320. for certUpdate := range updates {
  321. if certUpdate.Err != nil {
  322. logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
  323. continue
  324. }
  325. n.Lock()
  326. n.role = certUpdate.Role
  327. n.roleCond.Broadcast()
  328. n.Unlock()
  329. }
  330. wg.Done()
  331. }()
  332. role := n.role
  333. managerReady := make(chan struct{})
  334. agentReady := make(chan struct{})
  335. var managerErr error
  336. var agentErr error
  337. go func() {
  338. managerErr = n.superviseManager(ctx, securityConfig, paths.RootCA, managerReady, forceCertRenewal) // store err and loop
  339. wg.Done()
  340. cancel()
  341. }()
  342. go func() {
  343. agentErr = n.runAgent(ctx, db, securityConfig, agentReady)
  344. wg.Done()
  345. cancel()
  346. close(agentDone)
  347. }()
  348. go func() {
  349. <-agentReady
  350. if role == ca.ManagerRole {
  351. workerRole := make(chan struct{})
  352. waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  353. go func() {
  354. if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  355. close(workerRole)
  356. }
  357. }()
  358. select {
  359. case <-managerReady:
  360. case <-workerRole:
  361. }
  362. waitRoleCancel()
  363. }
  364. close(n.ready)
  365. }()
  366. wg.Wait()
  367. if managerErr != nil && managerErr != context.Canceled {
  368. return managerErr
  369. }
  370. if agentErr != nil && agentErr != context.Canceled {
  371. return agentErr
  372. }
  373. return err
  374. }
  375. // Stop stops node execution
  376. func (n *Node) Stop(ctx context.Context) error {
  377. select {
  378. case <-n.started:
  379. default:
  380. return errNodeNotStarted
  381. }
  382. // ask agent to clean up assignments
  383. n.Lock()
  384. if n.agent != nil {
  385. if err := n.agent.Leave(ctx); err != nil {
  386. log.G(ctx).WithError(err).Error("agent failed to clean up assignments")
  387. }
  388. }
  389. n.Unlock()
  390. n.stopOnce.Do(func() {
  391. close(n.stopped)
  392. })
  393. select {
  394. case <-n.closed:
  395. return nil
  396. case <-ctx.Done():
  397. return ctx.Err()
  398. }
  399. }
  400. // Err returns the error that caused the node to shutdown or nil. Err blocks
  401. // until the node has fully shut down.
  402. func (n *Node) Err(ctx context.Context) error {
  403. select {
  404. case <-n.closed:
  405. return n.err
  406. case <-ctx.Done():
  407. return ctx.Err()
  408. }
  409. }
  410. func (n *Node) runAgent(ctx context.Context, db *bolt.DB, securityConfig *ca.SecurityConfig, ready chan<- struct{}) error {
  411. waitCtx, waitCancel := context.WithCancel(ctx)
  412. remotesCh := n.remotes.WaitSelect(ctx)
  413. controlCh := n.ListenControlSocket(waitCtx)
  414. waitPeer:
  415. for {
  416. select {
  417. case <-ctx.Done():
  418. break waitPeer
  419. case <-remotesCh:
  420. break waitPeer
  421. case conn := <-controlCh:
  422. if conn != nil {
  423. break waitPeer
  424. }
  425. }
  426. }
  427. waitCancel()
  428. select {
  429. case <-ctx.Done():
  430. return ctx.Err()
  431. default:
  432. }
  433. secChangeQueue := watch.NewQueue()
  434. defer secChangeQueue.Close()
  435. secChangesCh, secChangesCancel := secChangeQueue.Watch()
  436. defer secChangesCancel()
  437. securityConfig.SetWatch(secChangeQueue)
  438. rootCA := securityConfig.RootCA()
  439. issuer := securityConfig.IssuerInfo()
  440. a, err := agent.New(&agent.Config{
  441. Hostname: n.config.Hostname,
  442. ConnBroker: n.connBroker,
  443. Executor: n.config.Executor,
  444. DB: db,
  445. NotifyNodeChange: n.notifyNodeChange,
  446. NotifyTLSChange: secChangesCh,
  447. Credentials: securityConfig.ClientTLSCreds,
  448. NodeTLSInfo: &api.NodeTLSInfo{
  449. TrustRoot: rootCA.Certs,
  450. CertIssuerPublicKey: issuer.PublicKey,
  451. CertIssuerSubject: issuer.Subject,
  452. },
  453. })
  454. if err != nil {
  455. return err
  456. }
  457. if err := a.Start(ctx); err != nil {
  458. return err
  459. }
  460. n.Lock()
  461. n.agent = a
  462. n.Unlock()
  463. defer func() {
  464. n.Lock()
  465. n.agent = nil
  466. n.Unlock()
  467. }()
  468. go func() {
  469. <-a.Ready()
  470. close(ready)
  471. }()
  472. // todo: manually call stop on context cancellation?
  473. return a.Err(context.Background())
  474. }
  475. // Ready returns a channel that is closed after node's initialization has
  476. // completes for the first time.
  477. func (n *Node) Ready() <-chan struct{} {
  478. return n.ready
  479. }
  480. func (n *Node) setControlSocket(conn *grpc.ClientConn) {
  481. n.Lock()
  482. if n.conn != nil {
  483. n.conn.Close()
  484. }
  485. n.conn = conn
  486. n.connBroker.SetLocalConn(conn)
  487. n.connCond.Broadcast()
  488. n.Unlock()
  489. }
  490. // ListenControlSocket listens changes of a connection for managing the
  491. // cluster control api
  492. func (n *Node) ListenControlSocket(ctx context.Context) <-chan *grpc.ClientConn {
  493. c := make(chan *grpc.ClientConn, 1)
  494. n.RLock()
  495. conn := n.conn
  496. c <- conn
  497. done := make(chan struct{})
  498. go func() {
  499. select {
  500. case <-ctx.Done():
  501. n.connCond.Broadcast()
  502. case <-done:
  503. }
  504. }()
  505. go func() {
  506. defer close(c)
  507. defer close(done)
  508. defer n.RUnlock()
  509. for {
  510. select {
  511. case <-ctx.Done():
  512. return
  513. default:
  514. }
  515. if conn == n.conn {
  516. n.connCond.Wait()
  517. continue
  518. }
  519. conn = n.conn
  520. select {
  521. case c <- conn:
  522. case <-ctx.Done():
  523. return
  524. }
  525. }
  526. }()
  527. return c
  528. }
  529. // NodeID returns current node's ID. May be empty if not set.
  530. func (n *Node) NodeID() string {
  531. n.RLock()
  532. defer n.RUnlock()
  533. return n.nodeID
  534. }
  535. // Manager returns manager instance started by node. May be nil.
  536. func (n *Node) Manager() *manager.Manager {
  537. n.RLock()
  538. defer n.RUnlock()
  539. return n.manager
  540. }
  541. // Agent returns agent instance started by node. May be nil.
  542. func (n *Node) Agent() *agent.Agent {
  543. n.RLock()
  544. defer n.RUnlock()
  545. return n.agent
  546. }
  547. // IsStateDirty returns true if any objects have been added to raft which make
  548. // the state "dirty". Currently, the existence of any object other than the
  549. // default cluster or the local node implies a dirty state.
  550. func (n *Node) IsStateDirty() (bool, error) {
  551. n.RLock()
  552. defer n.RUnlock()
  553. if n.manager == nil {
  554. return false, errors.New("node is not a manager")
  555. }
  556. return n.manager.IsStateDirty()
  557. }
  558. // Remotes returns a list of known peers known to node.
  559. func (n *Node) Remotes() []api.Peer {
  560. weights := n.remotes.Weights()
  561. remotes := make([]api.Peer, 0, len(weights))
  562. for p := range weights {
  563. remotes = append(remotes, p)
  564. }
  565. return remotes
  566. }
  567. func (n *Node) loadSecurityConfig(ctx context.Context, paths *ca.SecurityConfigPaths) (*ca.SecurityConfig, error) {
  568. var securityConfig *ca.SecurityConfig
  569. krw := ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
  570. if err := krw.Migrate(); err != nil {
  571. return nil, err
  572. }
  573. // Check if we already have a valid certificates on disk.
  574. rootCA, err := ca.GetLocalRootCA(paths.RootCA)
  575. if err != nil && err != ca.ErrNoLocalRootCA {
  576. return nil, err
  577. }
  578. if err == nil {
  579. // if forcing a new cluster, we allow the certificates to be expired - a new set will be generated
  580. securityConfig, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
  581. if err != nil {
  582. _, isInvalidKEK := errors.Cause(err).(ca.ErrInvalidKEK)
  583. if isInvalidKEK {
  584. return nil, ErrInvalidUnlockKey
  585. } else if !os.IsNotExist(err) {
  586. return nil, errors.Wrapf(err, "error while loading TLS certificate in %s", paths.Node.Cert)
  587. }
  588. }
  589. }
  590. if securityConfig == nil {
  591. if n.config.JoinAddr == "" {
  592. // if we're not joining a cluster, bootstrap a new one - and we have to set the unlock key
  593. n.unlockKey = nil
  594. if n.config.AutoLockManagers {
  595. n.unlockKey = encryption.GenerateSecretKey()
  596. }
  597. krw = ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
  598. rootCA, err = ca.CreateRootCA(ca.DefaultRootCN)
  599. if err != nil {
  600. return nil, err
  601. }
  602. if err := ca.SaveRootCA(rootCA, paths.RootCA); err != nil {
  603. return nil, err
  604. }
  605. log.G(ctx).Debug("generated CA key and certificate")
  606. } else if err == ca.ErrNoLocalRootCA { // from previous error loading the root CA from disk
  607. rootCA, err = ca.DownloadRootCA(ctx, paths.RootCA, n.config.JoinToken, n.connBroker)
  608. if err != nil {
  609. return nil, err
  610. }
  611. log.G(ctx).Debug("downloaded CA certificate")
  612. }
  613. // Obtain new certs and setup TLS certificates renewal for this node:
  614. // - If certificates weren't present on disk, we call CreateSecurityConfig, which blocks
  615. // until a valid certificate has been issued.
  616. // - We wait for CreateSecurityConfig to finish since we need a certificate to operate.
  617. // Attempt to load certificate from disk
  618. securityConfig, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
  619. if err == nil {
  620. log.G(ctx).WithFields(logrus.Fields{
  621. "node.id": securityConfig.ClientTLSCreds.NodeID(),
  622. }).Debugf("loaded TLS certificate")
  623. } else {
  624. if _, ok := errors.Cause(err).(ca.ErrInvalidKEK); ok {
  625. return nil, ErrInvalidUnlockKey
  626. }
  627. log.G(ctx).WithError(err).Debugf("no node credentials found in: %s", krw.Target())
  628. securityConfig, err = rootCA.CreateSecurityConfig(ctx, krw, ca.CertificateRequestConfig{
  629. Token: n.config.JoinToken,
  630. Availability: n.config.Availability,
  631. ConnBroker: n.connBroker,
  632. })
  633. if err != nil {
  634. return nil, err
  635. }
  636. }
  637. }
  638. n.Lock()
  639. n.role = securityConfig.ClientTLSCreds.Role()
  640. n.nodeID = securityConfig.ClientTLSCreds.NodeID()
  641. n.roleCond.Broadcast()
  642. n.Unlock()
  643. return securityConfig, nil
  644. }
  645. func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) error {
  646. opts := []grpc.DialOption{
  647. grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor),
  648. grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor),
  649. }
  650. insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
  651. opts = append(opts, grpc.WithTransportCredentials(insecureCreds))
  652. addr := n.config.ListenControlAPI
  653. opts = append(opts, grpc.WithDialer(
  654. func(addr string, timeout time.Duration) (net.Conn, error) {
  655. return xnet.DialTimeoutLocal(addr, timeout)
  656. }))
  657. conn, err := grpc.Dial(addr, opts...)
  658. if err != nil {
  659. return err
  660. }
  661. client := api.NewHealthClient(conn)
  662. for {
  663. resp, err := client.Check(ctx, &api.HealthCheckRequest{Service: "ControlAPI"})
  664. if err != nil {
  665. return err
  666. }
  667. if resp.Status == api.HealthCheckResponse_SERVING {
  668. break
  669. }
  670. time.Sleep(500 * time.Millisecond)
  671. }
  672. n.setControlSocket(conn)
  673. if ready != nil {
  674. close(ready)
  675. }
  676. return nil
  677. }
  678. func (n *Node) waitRole(ctx context.Context, role string) error {
  679. n.roleCond.L.Lock()
  680. if role == n.role {
  681. n.roleCond.L.Unlock()
  682. return nil
  683. }
  684. finishCh := make(chan struct{})
  685. defer close(finishCh)
  686. go func() {
  687. select {
  688. case <-finishCh:
  689. case <-ctx.Done():
  690. // call broadcast to shutdown this function
  691. n.roleCond.Broadcast()
  692. }
  693. }()
  694. defer n.roleCond.L.Unlock()
  695. for role != n.role {
  696. n.roleCond.Wait()
  697. select {
  698. case <-ctx.Done():
  699. return ctx.Err()
  700. default:
  701. }
  702. }
  703. return nil
  704. }
  705. func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, workerRole <-chan struct{}) (bool, error) {
  706. var remoteAPI *manager.RemoteAddrs
  707. if n.config.ListenRemoteAPI != "" {
  708. remoteAPI = &manager.RemoteAddrs{
  709. ListenAddr: n.config.ListenRemoteAPI,
  710. AdvertiseAddr: n.config.AdvertiseRemoteAPI,
  711. }
  712. }
  713. remoteAddr, _ := n.remotes.Select(n.NodeID())
  714. m, err := manager.New(&manager.Config{
  715. ForceNewCluster: n.config.ForceNewCluster,
  716. RemoteAPI: remoteAPI,
  717. ControlAPI: n.config.ListenControlAPI,
  718. SecurityConfig: securityConfig,
  719. ExternalCAs: n.config.ExternalCAs,
  720. JoinRaft: remoteAddr.Addr,
  721. StateDir: n.config.StateDir,
  722. HeartbeatTick: n.config.HeartbeatTick,
  723. ElectionTick: n.config.ElectionTick,
  724. AutoLockManagers: n.config.AutoLockManagers,
  725. UnlockKey: n.unlockKey,
  726. Availability: n.config.Availability,
  727. PluginGetter: n.config.PluginGetter,
  728. RootCAPaths: rootPaths,
  729. })
  730. if err != nil {
  731. return false, err
  732. }
  733. done := make(chan struct{})
  734. var runErr error
  735. go func(logger *logrus.Entry) {
  736. if err := m.Run(log.WithLogger(context.Background(), logger)); err != nil {
  737. runErr = err
  738. }
  739. close(done)
  740. }(log.G(ctx))
  741. var clearData bool
  742. defer func() {
  743. n.Lock()
  744. n.manager = nil
  745. n.Unlock()
  746. m.Stop(ctx, clearData)
  747. <-done
  748. n.setControlSocket(nil)
  749. }()
  750. n.Lock()
  751. n.manager = m
  752. n.Unlock()
  753. connCtx, connCancel := context.WithCancel(ctx)
  754. defer connCancel()
  755. go n.initManagerConnection(connCtx, ready)
  756. // wait for manager stop or for role change
  757. select {
  758. case <-done:
  759. return false, runErr
  760. case <-workerRole:
  761. log.G(ctx).Info("role changed to worker, stopping manager")
  762. clearData = true
  763. case <-m.RemovedFromRaft():
  764. log.G(ctx).Info("manager removed from raft cluster, stopping manager")
  765. clearData = true
  766. case <-ctx.Done():
  767. return false, ctx.Err()
  768. }
  769. return clearData, nil
  770. }
  771. func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, forceCertRenewal chan struct{}) error {
  772. for {
  773. if err := n.waitRole(ctx, ca.ManagerRole); err != nil {
  774. return err
  775. }
  776. workerRole := make(chan struct{})
  777. waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  778. go func() {
  779. if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  780. close(workerRole)
  781. }
  782. }()
  783. wasRemoved, err := n.runManager(ctx, securityConfig, rootPaths, ready, workerRole)
  784. if err != nil {
  785. waitRoleCancel()
  786. return errors.Wrap(err, "manager stopped")
  787. }
  788. // If the manager stopped running and our role is still
  789. // "manager", it's possible that the manager was demoted and
  790. // the agent hasn't realized this yet. We should wait for the
  791. // role to change instead of restarting the manager immediately.
  792. err = func() error {
  793. timer := time.NewTimer(roleChangeTimeout)
  794. defer timer.Stop()
  795. defer waitRoleCancel()
  796. select {
  797. case <-timer.C:
  798. case <-workerRole:
  799. return nil
  800. case <-ctx.Done():
  801. return ctx.Err()
  802. }
  803. if !wasRemoved {
  804. log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  805. return nil
  806. }
  807. // We need to be extra careful about restarting the
  808. // manager. It may cause the node to wrongly join under
  809. // a new Raft ID. Since we didn't see a role change
  810. // yet, force a certificate renewal. If the certificate
  811. // comes back with a worker role, we know we shouldn't
  812. // restart the manager. However, if we don't see
  813. // workerRole get closed, it means we didn't switch to
  814. // a worker certificate, either because we couldn't
  815. // contact a working CA, or because we've been
  816. // re-promoted. In this case, we must assume we were
  817. // re-promoted, and restart the manager.
  818. log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal")
  819. timer.Reset(roleChangeTimeout)
  820. select {
  821. case forceCertRenewal <- struct{}{}:
  822. case <-timer.C:
  823. log.G(ctx).Warn("failed to trigger certificate renewal after manager stop, restarting manager")
  824. return nil
  825. case <-ctx.Done():
  826. return ctx.Err()
  827. }
  828. // Now that the renewal request has been sent to the
  829. // renewal goroutine, wait for a change in role.
  830. select {
  831. case <-timer.C:
  832. log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  833. case <-workerRole:
  834. case <-ctx.Done():
  835. return ctx.Err()
  836. }
  837. return nil
  838. }()
  839. if err != nil {
  840. return err
  841. }
  842. ready = nil
  843. }
  844. }
  845. type persistentRemotes struct {
  846. sync.RWMutex
  847. c *sync.Cond
  848. remotes.Remotes
  849. storePath string
  850. lastSavedState []api.Peer
  851. }
  852. func newPersistentRemotes(f string, peers ...api.Peer) *persistentRemotes {
  853. pr := &persistentRemotes{
  854. storePath: f,
  855. Remotes: remotes.NewRemotes(peers...),
  856. }
  857. pr.c = sync.NewCond(pr.RLocker())
  858. return pr
  859. }
  860. func (s *persistentRemotes) Observe(peer api.Peer, weight int) {
  861. s.Lock()
  862. defer s.Unlock()
  863. s.Remotes.Observe(peer, weight)
  864. s.c.Broadcast()
  865. if err := s.save(); err != nil {
  866. logrus.Errorf("error writing cluster state file: %v", err)
  867. return
  868. }
  869. return
  870. }
  871. func (s *persistentRemotes) Remove(peers ...api.Peer) {
  872. s.Lock()
  873. defer s.Unlock()
  874. s.Remotes.Remove(peers...)
  875. if err := s.save(); err != nil {
  876. logrus.Errorf("error writing cluster state file: %v", err)
  877. return
  878. }
  879. return
  880. }
  881. func (s *persistentRemotes) save() error {
  882. weights := s.Weights()
  883. remotes := make([]api.Peer, 0, len(weights))
  884. for r := range weights {
  885. remotes = append(remotes, r)
  886. }
  887. sort.Sort(sortablePeers(remotes))
  888. if reflect.DeepEqual(remotes, s.lastSavedState) {
  889. return nil
  890. }
  891. dt, err := json.Marshal(remotes)
  892. if err != nil {
  893. return err
  894. }
  895. s.lastSavedState = remotes
  896. return ioutils.AtomicWriteFile(s.storePath, dt, 0600)
  897. }
  898. // WaitSelect waits until at least one remote becomes available and then selects one.
  899. func (s *persistentRemotes) WaitSelect(ctx context.Context) <-chan api.Peer {
  900. c := make(chan api.Peer, 1)
  901. s.RLock()
  902. done := make(chan struct{})
  903. go func() {
  904. select {
  905. case <-ctx.Done():
  906. s.c.Broadcast()
  907. case <-done:
  908. }
  909. }()
  910. go func() {
  911. defer s.RUnlock()
  912. defer close(c)
  913. defer close(done)
  914. for {
  915. if ctx.Err() != nil {
  916. return
  917. }
  918. p, err := s.Select()
  919. if err == nil {
  920. c <- p
  921. return
  922. }
  923. s.c.Wait()
  924. }
  925. }()
  926. return c
  927. }
  928. // sortablePeers is a sort wrapper for []api.Peer
  929. type sortablePeers []api.Peer
  930. func (sp sortablePeers) Less(i, j int) bool { return sp[i].NodeID < sp[j].NodeID }
  931. func (sp sortablePeers) Len() int { return len(sp) }
  932. func (sp sortablePeers) Swap(i, j int) { sp[i], sp[j] = sp[j], sp[i] }