node.go 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103
  1. package node
  2. import (
  3. "bytes"
  4. "crypto/tls"
  5. "encoding/json"
  6. "io/ioutil"
  7. "net"
  8. "os"
  9. "path/filepath"
  10. "reflect"
  11. "sort"
  12. "strings"
  13. "sync"
  14. "time"
  15. "github.com/boltdb/bolt"
  16. "github.com/docker/docker/pkg/plugingetter"
  17. metrics "github.com/docker/go-metrics"
  18. "github.com/docker/swarmkit/agent"
  19. "github.com/docker/swarmkit/agent/exec"
  20. "github.com/docker/swarmkit/api"
  21. "github.com/docker/swarmkit/ca"
  22. "github.com/docker/swarmkit/connectionbroker"
  23. "github.com/docker/swarmkit/ioutils"
  24. "github.com/docker/swarmkit/log"
  25. "github.com/docker/swarmkit/manager"
  26. "github.com/docker/swarmkit/manager/encryption"
  27. "github.com/docker/swarmkit/remotes"
  28. "github.com/docker/swarmkit/xnet"
  29. grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
  30. "github.com/pkg/errors"
  31. "github.com/sirupsen/logrus"
  32. "golang.org/x/net/context"
  33. "google.golang.org/grpc"
  34. "google.golang.org/grpc/codes"
  35. "google.golang.org/grpc/credentials"
  36. )
  37. const (
  38. stateFilename = "state.json"
  39. roleChangeTimeout = 16 * time.Second
  40. )
  41. var (
  42. nodeInfo metrics.LabeledGauge
  43. nodeManager metrics.Gauge
  44. errNodeStarted = errors.New("node: already started")
  45. errNodeNotStarted = errors.New("node: not started")
  46. certDirectory = "certificates"
  47. // ErrInvalidUnlockKey is returned when we can't decrypt the TLS certificate
  48. ErrInvalidUnlockKey = errors.New("node is locked, and needs a valid unlock key")
  49. )
  50. func init() {
  51. ns := metrics.NewNamespace("swarm", "node", nil)
  52. nodeInfo = ns.NewLabeledGauge("info", "Information related to the swarm", "",
  53. "swarm_id",
  54. "node_id",
  55. )
  56. nodeManager = ns.NewGauge("manager", "Whether this node is a manager or not", "")
  57. metrics.Register(ns)
  58. }
  59. // Config provides values for a Node.
  60. type Config struct {
  61. // Hostname is the name of host for agent instance.
  62. Hostname string
  63. // JoinAddr specifies node that should be used for the initial connection to
  64. // other manager in cluster. This should be only one address and optional,
  65. // the actual remotes come from the stored state.
  66. JoinAddr string
  67. // StateDir specifies the directory the node uses to keep the state of the
  68. // remote managers and certificates.
  69. StateDir string
  70. // JoinToken is the token to be used on the first certificate request.
  71. JoinToken string
  72. // ExternalCAs is a list of CAs to which a manager node
  73. // will make certificate signing requests for node certificates.
  74. ExternalCAs []*api.ExternalCA
  75. // ForceNewCluster creates a new cluster from current raft state.
  76. ForceNewCluster bool
  77. // ListenControlAPI specifies address the control API should listen on.
  78. ListenControlAPI string
  79. // ListenRemoteAPI specifies the address for the remote API that agents
  80. // and raft members connect to.
  81. ListenRemoteAPI string
  82. // AdvertiseRemoteAPI specifies the address that should be advertised
  83. // for connections to the remote API (including the raft service).
  84. AdvertiseRemoteAPI string
  85. // Executor specifies the executor to use for the agent.
  86. Executor exec.Executor
  87. // ElectionTick defines the amount of ticks needed without
  88. // leader to trigger a new election
  89. ElectionTick uint32
  90. // HeartbeatTick defines the amount of ticks between each
  91. // heartbeat sent to other members for health-check purposes
  92. HeartbeatTick uint32
  93. // AutoLockManagers determines whether or not an unlock key will be generated
  94. // when bootstrapping a new cluster for the first time
  95. AutoLockManagers bool
  96. // UnlockKey is the key to unlock a node - used for decrypting at rest. This
  97. // only applies to nodes that have already joined a cluster.
  98. UnlockKey []byte
  99. // Availability allows a user to control the current scheduling status of a node
  100. Availability api.NodeSpec_Availability
  101. // PluginGetter provides access to docker's plugin inventory.
  102. PluginGetter plugingetter.PluginGetter
  103. }
  104. // Node implements the primary node functionality for a member of a swarm
  105. // cluster. Node handles workloads and may also run as a manager.
  106. type Node struct {
  107. sync.RWMutex
  108. config *Config
  109. remotes *persistentRemotes
  110. connBroker *connectionbroker.Broker
  111. role string
  112. roleCond *sync.Cond
  113. conn *grpc.ClientConn
  114. connCond *sync.Cond
  115. nodeID string
  116. started chan struct{}
  117. startOnce sync.Once
  118. stopped chan struct{}
  119. stopOnce sync.Once
  120. ready chan struct{} // closed when agent has completed registration and manager(if enabled) is ready to receive control requests
  121. closed chan struct{}
  122. err error
  123. agent *agent.Agent
  124. manager *manager.Manager
  125. notifyNodeChange chan *agent.NodeChanges // used by the agent to relay node updates from the dispatcher Session stream to (*Node).run
  126. unlockKey []byte
  127. }
  128. type lastSeenRole struct {
  129. role api.NodeRole
  130. }
  131. // observe notes the latest value of this node role, and returns true if it
  132. // is the first seen value, or is different from the most recently seen value.
  133. func (l *lastSeenRole) observe(newRole api.NodeRole) bool {
  134. changed := l.role != newRole
  135. l.role = newRole
  136. return changed
  137. }
  138. // RemoteAPIAddr returns address on which remote manager api listens.
  139. // Returns nil if node is not manager.
  140. func (n *Node) RemoteAPIAddr() (string, error) {
  141. n.RLock()
  142. defer n.RUnlock()
  143. if n.manager == nil {
  144. return "", errors.New("manager is not running")
  145. }
  146. addr := n.manager.Addr()
  147. if addr == "" {
  148. return "", errors.New("manager addr is not set")
  149. }
  150. return addr, nil
  151. }
  152. // New returns new Node instance.
  153. func New(c *Config) (*Node, error) {
  154. if err := os.MkdirAll(c.StateDir, 0700); err != nil {
  155. return nil, err
  156. }
  157. stateFile := filepath.Join(c.StateDir, stateFilename)
  158. dt, err := ioutil.ReadFile(stateFile)
  159. var p []api.Peer
  160. if err != nil && !os.IsNotExist(err) {
  161. return nil, err
  162. }
  163. if err == nil {
  164. if err := json.Unmarshal(dt, &p); err != nil {
  165. return nil, err
  166. }
  167. }
  168. n := &Node{
  169. remotes: newPersistentRemotes(stateFile, p...),
  170. role: ca.WorkerRole,
  171. config: c,
  172. started: make(chan struct{}),
  173. stopped: make(chan struct{}),
  174. closed: make(chan struct{}),
  175. ready: make(chan struct{}),
  176. notifyNodeChange: make(chan *agent.NodeChanges, 1),
  177. unlockKey: c.UnlockKey,
  178. }
  179. if n.config.JoinAddr != "" || n.config.ForceNewCluster {
  180. n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename))
  181. if n.config.JoinAddr != "" {
  182. n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight)
  183. }
  184. }
  185. n.connBroker = connectionbroker.New(n.remotes)
  186. n.roleCond = sync.NewCond(n.RLocker())
  187. n.connCond = sync.NewCond(n.RLocker())
  188. return n, nil
  189. }
  190. // BindRemote starts a listener that exposes the remote API.
  191. func (n *Node) BindRemote(ctx context.Context, listenAddr string, advertiseAddr string) error {
  192. n.RLock()
  193. defer n.RUnlock()
  194. if n.manager == nil {
  195. return errors.New("manager is not running")
  196. }
  197. return n.manager.BindRemote(ctx, manager.RemoteAddrs{
  198. ListenAddr: listenAddr,
  199. AdvertiseAddr: advertiseAddr,
  200. })
  201. }
  202. // Start starts a node instance.
  203. func (n *Node) Start(ctx context.Context) error {
  204. err := errNodeStarted
  205. n.startOnce.Do(func() {
  206. close(n.started)
  207. go n.run(ctx)
  208. err = nil // clear error above, only once.
  209. })
  210. return err
  211. }
  212. func (n *Node) currentRole() api.NodeRole {
  213. n.Lock()
  214. currentRole := api.NodeRoleWorker
  215. if n.role == ca.ManagerRole {
  216. currentRole = api.NodeRoleManager
  217. }
  218. n.Unlock()
  219. return currentRole
  220. }
  221. func (n *Node) run(ctx context.Context) (err error) {
  222. defer func() {
  223. n.err = err
  224. close(n.closed)
  225. }()
  226. ctx, cancel := context.WithCancel(ctx)
  227. defer cancel()
  228. ctx = log.WithModule(ctx, "node")
  229. go func(ctx context.Context) {
  230. select {
  231. case <-ctx.Done():
  232. case <-n.stopped:
  233. cancel()
  234. }
  235. }(ctx)
  236. paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory))
  237. securityConfig, secConfigCancel, err := n.loadSecurityConfig(ctx, paths)
  238. if err != nil {
  239. return err
  240. }
  241. defer secConfigCancel()
  242. renewer := ca.NewTLSRenewer(securityConfig, n.connBroker, paths.RootCA)
  243. ctx = log.WithLogger(ctx, log.G(ctx).WithField("node.id", n.NodeID()))
  244. taskDBPath := filepath.Join(n.config.StateDir, "worker", "tasks.db")
  245. if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
  246. return err
  247. }
  248. db, err := bolt.Open(taskDBPath, 0666, nil)
  249. if err != nil {
  250. return err
  251. }
  252. defer db.Close()
  253. agentDone := make(chan struct{})
  254. go func() {
  255. // lastNodeDesiredRole is the last-seen value of Node.Spec.DesiredRole,
  256. // used to make role changes "edge triggered" and avoid renewal loops.
  257. lastNodeDesiredRole := lastSeenRole{role: n.currentRole()}
  258. for {
  259. select {
  260. case <-agentDone:
  261. return
  262. case nodeChanges := <-n.notifyNodeChange:
  263. if nodeChanges.Node != nil {
  264. // This is a bit complex to be backward compatible with older CAs that
  265. // don't support the Node.Role field. They only use what's presently
  266. // called DesiredRole.
  267. // 1) If DesiredRole changes, kick off a certificate renewal. The renewal
  268. // is delayed slightly to give Role time to change as well if this is
  269. // a newer CA. If the certificate we get back doesn't have the expected
  270. // role, we continue renewing with exponential backoff.
  271. // 2) If the server is sending us IssuanceStateRotate, renew the cert as
  272. // requested by the CA.
  273. desiredRoleChanged := lastNodeDesiredRole.observe(nodeChanges.Node.Spec.DesiredRole)
  274. if desiredRoleChanged {
  275. switch nodeChanges.Node.Spec.DesiredRole {
  276. case api.NodeRoleManager:
  277. renewer.SetExpectedRole(ca.ManagerRole)
  278. case api.NodeRoleWorker:
  279. renewer.SetExpectedRole(ca.WorkerRole)
  280. }
  281. }
  282. if desiredRoleChanged || nodeChanges.Node.Certificate.Status.State == api.IssuanceStateRotate {
  283. renewer.Renew()
  284. }
  285. }
  286. if nodeChanges.RootCert != nil {
  287. if bytes.Equal(nodeChanges.RootCert, securityConfig.RootCA().Certs) {
  288. continue
  289. }
  290. newRootCA, err := ca.NewRootCA(nodeChanges.RootCert, nil, nil, ca.DefaultNodeCertExpiration, nil)
  291. if err != nil {
  292. log.G(ctx).WithError(err).Error("invalid new root certificate from the dispatcher")
  293. continue
  294. }
  295. if err := securityConfig.UpdateRootCA(&newRootCA); err != nil {
  296. log.G(ctx).WithError(err).Error("could not use new root CA from dispatcher")
  297. continue
  298. }
  299. if err := ca.SaveRootCA(newRootCA, paths.RootCA); err != nil {
  300. log.G(ctx).WithError(err).Error("could not save new root certificate from the dispatcher")
  301. continue
  302. }
  303. }
  304. }
  305. }
  306. }()
  307. var wg sync.WaitGroup
  308. wg.Add(3)
  309. nodeInfo.WithValues(
  310. securityConfig.ClientTLSCreds.Organization(),
  311. securityConfig.ClientTLSCreds.NodeID(),
  312. ).Set(1)
  313. if n.currentRole() == api.NodeRoleManager {
  314. nodeManager.Set(1)
  315. } else {
  316. nodeManager.Set(0)
  317. }
  318. updates := renewer.Start(ctx)
  319. go func() {
  320. for certUpdate := range updates {
  321. if certUpdate.Err != nil {
  322. logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
  323. continue
  324. }
  325. n.Lock()
  326. n.role = certUpdate.Role
  327. n.roleCond.Broadcast()
  328. n.Unlock()
  329. // Export the new role.
  330. if n.currentRole() == api.NodeRoleManager {
  331. nodeManager.Set(1)
  332. } else {
  333. nodeManager.Set(0)
  334. }
  335. }
  336. wg.Done()
  337. }()
  338. role := n.role
  339. managerReady := make(chan struct{})
  340. agentReady := make(chan struct{})
  341. var managerErr error
  342. var agentErr error
  343. go func() {
  344. managerErr = n.superviseManager(ctx, securityConfig, paths.RootCA, managerReady, renewer) // store err and loop
  345. wg.Done()
  346. cancel()
  347. }()
  348. go func() {
  349. agentErr = n.runAgent(ctx, db, securityConfig, agentReady)
  350. wg.Done()
  351. cancel()
  352. close(agentDone)
  353. }()
  354. go func() {
  355. <-agentReady
  356. if role == ca.ManagerRole {
  357. workerRole := make(chan struct{})
  358. waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  359. go func() {
  360. if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  361. close(workerRole)
  362. }
  363. }()
  364. select {
  365. case <-managerReady:
  366. case <-workerRole:
  367. }
  368. waitRoleCancel()
  369. }
  370. close(n.ready)
  371. }()
  372. wg.Wait()
  373. if managerErr != nil && errors.Cause(managerErr) != context.Canceled {
  374. return managerErr
  375. }
  376. if agentErr != nil && errors.Cause(agentErr) != context.Canceled {
  377. return agentErr
  378. }
  379. return err
  380. }
  381. // Stop stops node execution
  382. func (n *Node) Stop(ctx context.Context) error {
  383. select {
  384. case <-n.started:
  385. default:
  386. return errNodeNotStarted
  387. }
  388. // ask agent to clean up assignments
  389. n.Lock()
  390. if n.agent != nil {
  391. if err := n.agent.Leave(ctx); err != nil {
  392. log.G(ctx).WithError(err).Error("agent failed to clean up assignments")
  393. }
  394. }
  395. n.Unlock()
  396. n.stopOnce.Do(func() {
  397. close(n.stopped)
  398. })
  399. select {
  400. case <-n.closed:
  401. return nil
  402. case <-ctx.Done():
  403. return ctx.Err()
  404. }
  405. }
  406. // Err returns the error that caused the node to shutdown or nil. Err blocks
  407. // until the node has fully shut down.
  408. func (n *Node) Err(ctx context.Context) error {
  409. select {
  410. case <-n.closed:
  411. return n.err
  412. case <-ctx.Done():
  413. return ctx.Err()
  414. }
  415. }
  416. func (n *Node) runAgent(ctx context.Context, db *bolt.DB, securityConfig *ca.SecurityConfig, ready chan<- struct{}) error {
  417. waitCtx, waitCancel := context.WithCancel(ctx)
  418. remotesCh := n.remotes.WaitSelect(ctx)
  419. controlCh := n.ListenControlSocket(waitCtx)
  420. waitPeer:
  421. for {
  422. select {
  423. case <-ctx.Done():
  424. break waitPeer
  425. case <-remotesCh:
  426. break waitPeer
  427. case conn := <-controlCh:
  428. if conn != nil {
  429. break waitPeer
  430. }
  431. }
  432. }
  433. waitCancel()
  434. select {
  435. case <-ctx.Done():
  436. return ctx.Err()
  437. default:
  438. }
  439. secChangesCh, secChangesCancel := securityConfig.Watch()
  440. defer secChangesCancel()
  441. rootCA := securityConfig.RootCA()
  442. issuer := securityConfig.IssuerInfo()
  443. agentConfig := &agent.Config{
  444. Hostname: n.config.Hostname,
  445. ConnBroker: n.connBroker,
  446. Executor: n.config.Executor,
  447. DB: db,
  448. NotifyNodeChange: n.notifyNodeChange,
  449. NotifyTLSChange: secChangesCh,
  450. Credentials: securityConfig.ClientTLSCreds,
  451. NodeTLSInfo: &api.NodeTLSInfo{
  452. TrustRoot: rootCA.Certs,
  453. CertIssuerPublicKey: issuer.PublicKey,
  454. CertIssuerSubject: issuer.Subject,
  455. },
  456. }
  457. // if a join address has been specified, then if the agent fails to connect due to a TLS error, fail fast - don't
  458. // keep re-trying to join
  459. if n.config.JoinAddr != "" {
  460. agentConfig.SessionTracker = &firstSessionErrorTracker{}
  461. }
  462. a, err := agent.New(agentConfig)
  463. if err != nil {
  464. return err
  465. }
  466. if err := a.Start(ctx); err != nil {
  467. return err
  468. }
  469. n.Lock()
  470. n.agent = a
  471. n.Unlock()
  472. defer func() {
  473. n.Lock()
  474. n.agent = nil
  475. n.Unlock()
  476. }()
  477. go func() {
  478. <-a.Ready()
  479. close(ready)
  480. }()
  481. // todo: manually call stop on context cancellation?
  482. return a.Err(context.Background())
  483. }
  484. // Ready returns a channel that is closed after node's initialization has
  485. // completes for the first time.
  486. func (n *Node) Ready() <-chan struct{} {
  487. return n.ready
  488. }
  489. func (n *Node) setControlSocket(conn *grpc.ClientConn) {
  490. n.Lock()
  491. if n.conn != nil {
  492. n.conn.Close()
  493. }
  494. n.conn = conn
  495. n.connBroker.SetLocalConn(conn)
  496. n.connCond.Broadcast()
  497. n.Unlock()
  498. }
  499. // ListenControlSocket listens changes of a connection for managing the
  500. // cluster control api
  501. func (n *Node) ListenControlSocket(ctx context.Context) <-chan *grpc.ClientConn {
  502. c := make(chan *grpc.ClientConn, 1)
  503. n.RLock()
  504. conn := n.conn
  505. c <- conn
  506. done := make(chan struct{})
  507. go func() {
  508. select {
  509. case <-ctx.Done():
  510. n.connCond.Broadcast()
  511. case <-done:
  512. }
  513. }()
  514. go func() {
  515. defer close(c)
  516. defer close(done)
  517. defer n.RUnlock()
  518. for {
  519. select {
  520. case <-ctx.Done():
  521. return
  522. default:
  523. }
  524. if conn == n.conn {
  525. n.connCond.Wait()
  526. continue
  527. }
  528. conn = n.conn
  529. select {
  530. case c <- conn:
  531. case <-ctx.Done():
  532. return
  533. }
  534. }
  535. }()
  536. return c
  537. }
  538. // NodeID returns current node's ID. May be empty if not set.
  539. func (n *Node) NodeID() string {
  540. n.RLock()
  541. defer n.RUnlock()
  542. return n.nodeID
  543. }
  544. // Manager returns manager instance started by node. May be nil.
  545. func (n *Node) Manager() *manager.Manager {
  546. n.RLock()
  547. defer n.RUnlock()
  548. return n.manager
  549. }
  550. // Agent returns agent instance started by node. May be nil.
  551. func (n *Node) Agent() *agent.Agent {
  552. n.RLock()
  553. defer n.RUnlock()
  554. return n.agent
  555. }
  556. // IsStateDirty returns true if any objects have been added to raft which make
  557. // the state "dirty". Currently, the existence of any object other than the
  558. // default cluster or the local node implies a dirty state.
  559. func (n *Node) IsStateDirty() (bool, error) {
  560. n.RLock()
  561. defer n.RUnlock()
  562. if n.manager == nil {
  563. return false, errors.New("node is not a manager")
  564. }
  565. return n.manager.IsStateDirty()
  566. }
  567. // Remotes returns a list of known peers known to node.
  568. func (n *Node) Remotes() []api.Peer {
  569. weights := n.remotes.Weights()
  570. remotes := make([]api.Peer, 0, len(weights))
  571. for p := range weights {
  572. remotes = append(remotes, p)
  573. }
  574. return remotes
  575. }
  576. func (n *Node) loadSecurityConfig(ctx context.Context, paths *ca.SecurityConfigPaths) (*ca.SecurityConfig, func() error, error) {
  577. var (
  578. securityConfig *ca.SecurityConfig
  579. cancel func() error
  580. )
  581. krw := ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
  582. if err := krw.Migrate(); err != nil {
  583. return nil, nil, err
  584. }
  585. // Check if we already have a valid certificates on disk.
  586. rootCA, err := ca.GetLocalRootCA(paths.RootCA)
  587. if err != nil && err != ca.ErrNoLocalRootCA {
  588. return nil, nil, err
  589. }
  590. if err == nil {
  591. // if forcing a new cluster, we allow the certificates to be expired - a new set will be generated
  592. securityConfig, cancel, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
  593. if err != nil {
  594. _, isInvalidKEK := errors.Cause(err).(ca.ErrInvalidKEK)
  595. if isInvalidKEK {
  596. return nil, nil, ErrInvalidUnlockKey
  597. } else if !os.IsNotExist(err) {
  598. return nil, nil, errors.Wrapf(err, "error while loading TLS certificate in %s", paths.Node.Cert)
  599. }
  600. }
  601. }
  602. if securityConfig == nil {
  603. if n.config.JoinAddr == "" {
  604. // if we're not joining a cluster, bootstrap a new one - and we have to set the unlock key
  605. n.unlockKey = nil
  606. if n.config.AutoLockManagers {
  607. n.unlockKey = encryption.GenerateSecretKey()
  608. }
  609. krw = ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
  610. rootCA, err = ca.CreateRootCA(ca.DefaultRootCN)
  611. if err != nil {
  612. return nil, nil, err
  613. }
  614. if err := ca.SaveRootCA(rootCA, paths.RootCA); err != nil {
  615. return nil, nil, err
  616. }
  617. log.G(ctx).Debug("generated CA key and certificate")
  618. } else if err == ca.ErrNoLocalRootCA { // from previous error loading the root CA from disk
  619. rootCA, err = ca.DownloadRootCA(ctx, paths.RootCA, n.config.JoinToken, n.connBroker)
  620. if err != nil {
  621. return nil, nil, err
  622. }
  623. log.G(ctx).Debug("downloaded CA certificate")
  624. }
  625. // Obtain new certs and setup TLS certificates renewal for this node:
  626. // - If certificates weren't present on disk, we call CreateSecurityConfig, which blocks
  627. // until a valid certificate has been issued.
  628. // - We wait for CreateSecurityConfig to finish since we need a certificate to operate.
  629. // Attempt to load certificate from disk
  630. securityConfig, cancel, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
  631. if err == nil {
  632. log.G(ctx).WithFields(logrus.Fields{
  633. "node.id": securityConfig.ClientTLSCreds.NodeID(),
  634. }).Debugf("loaded TLS certificate")
  635. } else {
  636. if _, ok := errors.Cause(err).(ca.ErrInvalidKEK); ok {
  637. return nil, nil, ErrInvalidUnlockKey
  638. }
  639. log.G(ctx).WithError(err).Debugf("no node credentials found in: %s", krw.Target())
  640. securityConfig, cancel, err = rootCA.CreateSecurityConfig(ctx, krw, ca.CertificateRequestConfig{
  641. Token: n.config.JoinToken,
  642. Availability: n.config.Availability,
  643. ConnBroker: n.connBroker,
  644. })
  645. if err != nil {
  646. return nil, nil, err
  647. }
  648. }
  649. }
  650. n.Lock()
  651. n.role = securityConfig.ClientTLSCreds.Role()
  652. n.nodeID = securityConfig.ClientTLSCreds.NodeID()
  653. n.roleCond.Broadcast()
  654. n.Unlock()
  655. return securityConfig, cancel, nil
  656. }
  657. func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) error {
  658. opts := []grpc.DialOption{
  659. grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor),
  660. grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor),
  661. }
  662. insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
  663. opts = append(opts, grpc.WithTransportCredentials(insecureCreds))
  664. addr := n.config.ListenControlAPI
  665. opts = append(opts, grpc.WithDialer(
  666. func(addr string, timeout time.Duration) (net.Conn, error) {
  667. return xnet.DialTimeoutLocal(addr, timeout)
  668. }))
  669. conn, err := grpc.Dial(addr, opts...)
  670. if err != nil {
  671. return err
  672. }
  673. client := api.NewHealthClient(conn)
  674. for {
  675. resp, err := client.Check(ctx, &api.HealthCheckRequest{Service: "ControlAPI"})
  676. if err != nil {
  677. return err
  678. }
  679. if resp.Status == api.HealthCheckResponse_SERVING {
  680. break
  681. }
  682. time.Sleep(500 * time.Millisecond)
  683. }
  684. n.setControlSocket(conn)
  685. if ready != nil {
  686. close(ready)
  687. }
  688. return nil
  689. }
  690. func (n *Node) waitRole(ctx context.Context, role string) error {
  691. n.roleCond.L.Lock()
  692. if role == n.role {
  693. n.roleCond.L.Unlock()
  694. return nil
  695. }
  696. finishCh := make(chan struct{})
  697. defer close(finishCh)
  698. go func() {
  699. select {
  700. case <-finishCh:
  701. case <-ctx.Done():
  702. // call broadcast to shutdown this function
  703. n.roleCond.Broadcast()
  704. }
  705. }()
  706. defer n.roleCond.L.Unlock()
  707. for role != n.role {
  708. n.roleCond.Wait()
  709. select {
  710. case <-ctx.Done():
  711. return ctx.Err()
  712. default:
  713. }
  714. }
  715. return nil
  716. }
  717. func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, workerRole <-chan struct{}) (bool, error) {
  718. var remoteAPI *manager.RemoteAddrs
  719. if n.config.ListenRemoteAPI != "" {
  720. remoteAPI = &manager.RemoteAddrs{
  721. ListenAddr: n.config.ListenRemoteAPI,
  722. AdvertiseAddr: n.config.AdvertiseRemoteAPI,
  723. }
  724. }
  725. joinAddr := n.config.JoinAddr
  726. if joinAddr == "" {
  727. remoteAddr, err := n.remotes.Select(n.NodeID())
  728. if err == nil {
  729. joinAddr = remoteAddr.Addr
  730. }
  731. }
  732. m, err := manager.New(&manager.Config{
  733. ForceNewCluster: n.config.ForceNewCluster,
  734. RemoteAPI: remoteAPI,
  735. ControlAPI: n.config.ListenControlAPI,
  736. SecurityConfig: securityConfig,
  737. ExternalCAs: n.config.ExternalCAs,
  738. JoinRaft: joinAddr,
  739. ForceJoin: n.config.JoinAddr != "",
  740. StateDir: n.config.StateDir,
  741. HeartbeatTick: n.config.HeartbeatTick,
  742. ElectionTick: n.config.ElectionTick,
  743. AutoLockManagers: n.config.AutoLockManagers,
  744. UnlockKey: n.unlockKey,
  745. Availability: n.config.Availability,
  746. PluginGetter: n.config.PluginGetter,
  747. RootCAPaths: rootPaths,
  748. })
  749. if err != nil {
  750. return false, err
  751. }
  752. done := make(chan struct{})
  753. var runErr error
  754. go func(logger *logrus.Entry) {
  755. if err := m.Run(log.WithLogger(context.Background(), logger)); err != nil {
  756. runErr = err
  757. }
  758. close(done)
  759. }(log.G(ctx))
  760. var clearData bool
  761. defer func() {
  762. n.Lock()
  763. n.manager = nil
  764. n.Unlock()
  765. m.Stop(ctx, clearData)
  766. <-done
  767. n.setControlSocket(nil)
  768. }()
  769. n.Lock()
  770. n.manager = m
  771. n.Unlock()
  772. connCtx, connCancel := context.WithCancel(ctx)
  773. defer connCancel()
  774. go n.initManagerConnection(connCtx, ready)
  775. // wait for manager stop or for role change
  776. select {
  777. case <-done:
  778. return false, runErr
  779. case <-workerRole:
  780. log.G(ctx).Info("role changed to worker, stopping manager")
  781. clearData = true
  782. case <-m.RemovedFromRaft():
  783. log.G(ctx).Info("manager removed from raft cluster, stopping manager")
  784. clearData = true
  785. case <-ctx.Done():
  786. return false, ctx.Err()
  787. }
  788. return clearData, nil
  789. }
  790. func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, renewer *ca.TLSRenewer) error {
  791. for {
  792. if err := n.waitRole(ctx, ca.ManagerRole); err != nil {
  793. return err
  794. }
  795. workerRole := make(chan struct{})
  796. waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  797. go func() {
  798. if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  799. close(workerRole)
  800. }
  801. }()
  802. wasRemoved, err := n.runManager(ctx, securityConfig, rootPaths, ready, workerRole)
  803. if err != nil {
  804. waitRoleCancel()
  805. return errors.Wrap(err, "manager stopped")
  806. }
  807. // If the manager stopped running and our role is still
  808. // "manager", it's possible that the manager was demoted and
  809. // the agent hasn't realized this yet. We should wait for the
  810. // role to change instead of restarting the manager immediately.
  811. err = func() error {
  812. timer := time.NewTimer(roleChangeTimeout)
  813. defer timer.Stop()
  814. defer waitRoleCancel()
  815. select {
  816. case <-timer.C:
  817. case <-workerRole:
  818. return nil
  819. case <-ctx.Done():
  820. return ctx.Err()
  821. }
  822. if !wasRemoved {
  823. log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  824. return nil
  825. }
  826. // We need to be extra careful about restarting the
  827. // manager. It may cause the node to wrongly join under
  828. // a new Raft ID. Since we didn't see a role change
  829. // yet, force a certificate renewal. If the certificate
  830. // comes back with a worker role, we know we shouldn't
  831. // restart the manager. However, if we don't see
  832. // workerRole get closed, it means we didn't switch to
  833. // a worker certificate, either because we couldn't
  834. // contact a working CA, or because we've been
  835. // re-promoted. In this case, we must assume we were
  836. // re-promoted, and restart the manager.
  837. log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal")
  838. timer.Reset(roleChangeTimeout)
  839. renewer.Renew()
  840. // Now that the renewal request has been sent to the
  841. // renewal goroutine, wait for a change in role.
  842. select {
  843. case <-timer.C:
  844. log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  845. case <-workerRole:
  846. case <-ctx.Done():
  847. return ctx.Err()
  848. }
  849. return nil
  850. }()
  851. if err != nil {
  852. return err
  853. }
  854. ready = nil
  855. }
  856. }
  857. type persistentRemotes struct {
  858. sync.RWMutex
  859. c *sync.Cond
  860. remotes.Remotes
  861. storePath string
  862. lastSavedState []api.Peer
  863. }
  864. func newPersistentRemotes(f string, peers ...api.Peer) *persistentRemotes {
  865. pr := &persistentRemotes{
  866. storePath: f,
  867. Remotes: remotes.NewRemotes(peers...),
  868. }
  869. pr.c = sync.NewCond(pr.RLocker())
  870. return pr
  871. }
  872. func (s *persistentRemotes) Observe(peer api.Peer, weight int) {
  873. s.Lock()
  874. defer s.Unlock()
  875. s.Remotes.Observe(peer, weight)
  876. s.c.Broadcast()
  877. if err := s.save(); err != nil {
  878. logrus.Errorf("error writing cluster state file: %v", err)
  879. return
  880. }
  881. return
  882. }
  883. func (s *persistentRemotes) Remove(peers ...api.Peer) {
  884. s.Lock()
  885. defer s.Unlock()
  886. s.Remotes.Remove(peers...)
  887. if err := s.save(); err != nil {
  888. logrus.Errorf("error writing cluster state file: %v", err)
  889. return
  890. }
  891. return
  892. }
  893. func (s *persistentRemotes) save() error {
  894. weights := s.Weights()
  895. remotes := make([]api.Peer, 0, len(weights))
  896. for r := range weights {
  897. remotes = append(remotes, r)
  898. }
  899. sort.Sort(sortablePeers(remotes))
  900. if reflect.DeepEqual(remotes, s.lastSavedState) {
  901. return nil
  902. }
  903. dt, err := json.Marshal(remotes)
  904. if err != nil {
  905. return err
  906. }
  907. s.lastSavedState = remotes
  908. return ioutils.AtomicWriteFile(s.storePath, dt, 0600)
  909. }
  910. // WaitSelect waits until at least one remote becomes available and then selects one.
  911. func (s *persistentRemotes) WaitSelect(ctx context.Context) <-chan api.Peer {
  912. c := make(chan api.Peer, 1)
  913. s.RLock()
  914. done := make(chan struct{})
  915. go func() {
  916. select {
  917. case <-ctx.Done():
  918. s.c.Broadcast()
  919. case <-done:
  920. }
  921. }()
  922. go func() {
  923. defer s.RUnlock()
  924. defer close(c)
  925. defer close(done)
  926. for {
  927. if ctx.Err() != nil {
  928. return
  929. }
  930. p, err := s.Select()
  931. if err == nil {
  932. c <- p
  933. return
  934. }
  935. s.c.Wait()
  936. }
  937. }()
  938. return c
  939. }
  940. // sortablePeers is a sort wrapper for []api.Peer
  941. type sortablePeers []api.Peer
  942. func (sp sortablePeers) Less(i, j int) bool { return sp[i].NodeID < sp[j].NodeID }
  943. func (sp sortablePeers) Len() int { return len(sp) }
  944. func (sp sortablePeers) Swap(i, j int) { sp[i], sp[j] = sp[j], sp[i] }
  945. // firstSessionErrorTracker is a utility that helps determine whether the agent should exit after
  946. // a TLS failure on establishing the first session. This should only happen if a join address
  947. // is specified. If establishing the first session succeeds, but later on some session fails
  948. // because of a TLS error, we don't want to exit the agent because a previously successful
  949. // session indicates that the TLS error may be a transient issue.
  950. type firstSessionErrorTracker struct {
  951. mu sync.Mutex
  952. pastFirstSession bool
  953. err error
  954. }
  955. func (fs *firstSessionErrorTracker) SessionEstablished() {
  956. fs.mu.Lock()
  957. fs.pastFirstSession = true
  958. fs.mu.Unlock()
  959. }
  960. func (fs *firstSessionErrorTracker) SessionError(err error) {
  961. fs.mu.Lock()
  962. fs.err = err
  963. fs.mu.Unlock()
  964. }
  965. func (fs *firstSessionErrorTracker) SessionClosed() error {
  966. fs.mu.Lock()
  967. defer fs.mu.Unlock()
  968. // unfortunately grpc connection errors are type grpc.rpcError, which are not exposed, and we can't get at the underlying error type
  969. if !fs.pastFirstSession && grpc.Code(fs.err) == codes.Internal &&
  970. strings.HasPrefix(grpc.ErrorDesc(fs.err), "connection error") && strings.Contains(grpc.ErrorDesc(fs.err), "transport: x509:") {
  971. return fs.err
  972. }
  973. return nil
  974. }