1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255 |
- package node
- import (
- "bytes"
- "crypto/tls"
- "encoding/json"
- "io/ioutil"
- "net"
- "os"
- "path/filepath"
- "reflect"
- "sort"
- "strings"
- "sync"
- "time"
- "github.com/boltdb/bolt"
- "github.com/docker/docker/pkg/plugingetter"
- metrics "github.com/docker/go-metrics"
- "github.com/docker/swarmkit/agent"
- "github.com/docker/swarmkit/agent/exec"
- "github.com/docker/swarmkit/api"
- "github.com/docker/swarmkit/ca"
- "github.com/docker/swarmkit/connectionbroker"
- "github.com/docker/swarmkit/ioutils"
- "github.com/docker/swarmkit/log"
- "github.com/docker/swarmkit/manager"
- "github.com/docker/swarmkit/manager/encryption"
- "github.com/docker/swarmkit/remotes"
- "github.com/docker/swarmkit/xnet"
- grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
- "github.com/pkg/errors"
- "github.com/sirupsen/logrus"
- "golang.org/x/net/context"
- "google.golang.org/grpc"
- "google.golang.org/grpc/codes"
- "google.golang.org/grpc/credentials"
- )
- const (
- stateFilename = "state.json"
- roleChangeTimeout = 16 * time.Second
- )
- var (
- nodeInfo metrics.LabeledGauge
- nodeManager metrics.Gauge
- errNodeStarted = errors.New("node: already started")
- errNodeNotStarted = errors.New("node: not started")
- certDirectory = "certificates"
- // ErrInvalidUnlockKey is returned when we can't decrypt the TLS certificate
- ErrInvalidUnlockKey = errors.New("node is locked, and needs a valid unlock key")
- )
- func init() {
- ns := metrics.NewNamespace("swarm", "node", nil)
- nodeInfo = ns.NewLabeledGauge("info", "Information related to the swarm", "",
- "swarm_id",
- "node_id",
- )
- nodeManager = ns.NewGauge("manager", "Whether this node is a manager or not", "")
- metrics.Register(ns)
- }
- // Config provides values for a Node.
- type Config struct {
- // Hostname is the name of host for agent instance.
- Hostname string
- // JoinAddr specifies node that should be used for the initial connection to
- // other manager in cluster. This should be only one address and optional,
- // the actual remotes come from the stored state.
- JoinAddr string
- // StateDir specifies the directory the node uses to keep the state of the
- // remote managers and certificates.
- StateDir string
- // JoinToken is the token to be used on the first certificate request.
- JoinToken string
- // ExternalCAs is a list of CAs to which a manager node
- // will make certificate signing requests for node certificates.
- ExternalCAs []*api.ExternalCA
- // ForceNewCluster creates a new cluster from current raft state.
- ForceNewCluster bool
- // ListenControlAPI specifies address the control API should listen on.
- ListenControlAPI string
- // ListenRemoteAPI specifies the address for the remote API that agents
- // and raft members connect to.
- ListenRemoteAPI string
- // AdvertiseRemoteAPI specifies the address that should be advertised
- // for connections to the remote API (including the raft service).
- AdvertiseRemoteAPI string
- // Executor specifies the executor to use for the agent.
- Executor exec.Executor
- // ElectionTick defines the amount of ticks needed without
- // leader to trigger a new election
- ElectionTick uint32
- // HeartbeatTick defines the amount of ticks between each
- // heartbeat sent to other members for health-check purposes
- HeartbeatTick uint32
- // AutoLockManagers determines whether or not an unlock key will be generated
- // when bootstrapping a new cluster for the first time
- AutoLockManagers bool
- // UnlockKey is the key to unlock a node - used for decrypting at rest. This
- // only applies to nodes that have already joined a cluster.
- UnlockKey []byte
- // Availability allows a user to control the current scheduling status of a node
- Availability api.NodeSpec_Availability
- // PluginGetter provides access to docker's plugin inventory.
- PluginGetter plugingetter.PluginGetter
- // FIPS is a boolean stating whether the node is FIPS enabled
- FIPS bool
- }
- // Node implements the primary node functionality for a member of a swarm
- // cluster. Node handles workloads and may also run as a manager.
- type Node struct {
- sync.RWMutex
- config *Config
- remotes *persistentRemotes
- connBroker *connectionbroker.Broker
- role string
- roleCond *sync.Cond
- conn *grpc.ClientConn
- connCond *sync.Cond
- nodeID string
- started chan struct{}
- startOnce sync.Once
- stopped chan struct{}
- stopOnce sync.Once
- ready chan struct{} // closed when agent has completed registration and manager(if enabled) is ready to receive control requests
- closed chan struct{}
- err error
- agent *agent.Agent
- manager *manager.Manager
- notifyNodeChange chan *agent.NodeChanges // used by the agent to relay node updates from the dispatcher Session stream to (*Node).run
- unlockKey []byte
- }
- type lastSeenRole struct {
- role api.NodeRole
- }
- // observe notes the latest value of this node role, and returns true if it
- // is the first seen value, or is different from the most recently seen value.
- func (l *lastSeenRole) observe(newRole api.NodeRole) bool {
- changed := l.role != newRole
- l.role = newRole
- return changed
- }
- // RemoteAPIAddr returns address on which remote manager api listens.
- // Returns nil if node is not manager.
- func (n *Node) RemoteAPIAddr() (string, error) {
- n.RLock()
- defer n.RUnlock()
- if n.manager == nil {
- return "", errors.New("manager is not running")
- }
- addr := n.manager.Addr()
- if addr == "" {
- return "", errors.New("manager addr is not set")
- }
- return addr, nil
- }
- // New returns new Node instance.
- func New(c *Config) (*Node, error) {
- if err := os.MkdirAll(c.StateDir, 0700); err != nil {
- return nil, err
- }
- stateFile := filepath.Join(c.StateDir, stateFilename)
- dt, err := ioutil.ReadFile(stateFile)
- var p []api.Peer
- if err != nil && !os.IsNotExist(err) {
- return nil, err
- }
- if err == nil {
- if err := json.Unmarshal(dt, &p); err != nil {
- return nil, err
- }
- }
- n := &Node{
- remotes: newPersistentRemotes(stateFile, p...),
- role: ca.WorkerRole,
- config: c,
- started: make(chan struct{}),
- stopped: make(chan struct{}),
- closed: make(chan struct{}),
- ready: make(chan struct{}),
- notifyNodeChange: make(chan *agent.NodeChanges, 1),
- unlockKey: c.UnlockKey,
- }
- if n.config.JoinAddr != "" || n.config.ForceNewCluster {
- n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename))
- if n.config.JoinAddr != "" {
- n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight)
- }
- }
- n.connBroker = connectionbroker.New(n.remotes)
- n.roleCond = sync.NewCond(n.RLocker())
- n.connCond = sync.NewCond(n.RLocker())
- return n, nil
- }
- // BindRemote starts a listener that exposes the remote API.
- func (n *Node) BindRemote(ctx context.Context, listenAddr string, advertiseAddr string) error {
- n.RLock()
- defer n.RUnlock()
- if n.manager == nil {
- return errors.New("manager is not running")
- }
- return n.manager.BindRemote(ctx, manager.RemoteAddrs{
- ListenAddr: listenAddr,
- AdvertiseAddr: advertiseAddr,
- })
- }
- // Start starts a node instance.
- func (n *Node) Start(ctx context.Context) error {
- err := errNodeStarted
- n.startOnce.Do(func() {
- close(n.started)
- go n.run(ctx)
- err = nil // clear error above, only once.
- })
- return err
- }
- func (n *Node) currentRole() api.NodeRole {
- n.Lock()
- currentRole := api.NodeRoleWorker
- if n.role == ca.ManagerRole {
- currentRole = api.NodeRoleManager
- }
- n.Unlock()
- return currentRole
- }
- func (n *Node) run(ctx context.Context) (err error) {
- defer func() {
- n.err = err
- // close the n.closed channel to indicate that the Node has completely
- // terminated
- close(n.closed)
- }()
- ctx, cancel := context.WithCancel(ctx)
- defer cancel()
- ctx = log.WithModule(ctx, "node")
- // set up a goroutine to monitor the stop channel, and cancel the run
- // context when the node is stopped
- go func(ctx context.Context) {
- select {
- case <-ctx.Done():
- case <-n.stopped:
- cancel()
- }
- }(ctx)
- // First thing's first: get the SecurityConfig for this node. This includes
- // the certificate information, and the root CA. It also returns a cancel
- // function. This is needed because the SecurityConfig is a live object,
- // and provides a watch queue so that caller can observe changes to the
- // security config. This watch queue has to be closed, which is done by the
- // secConfigCancel function.
- //
- // It's also noteworthy that loading the security config with the node's
- // loadSecurityConfig method has the side effect of setting the node's ID
- // and role fields, meaning it isn't until after that point that node knows
- // its ID
- paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory))
- securityConfig, secConfigCancel, err := n.loadSecurityConfig(ctx, paths)
- if err != nil {
- return err
- }
- defer secConfigCancel()
- // Now that we have the security config, we can get a TLSRenewer, which is
- // a live component handling certificate rotation.
- renewer := ca.NewTLSRenewer(securityConfig, n.connBroker, paths.RootCA)
- // Now that we have the security goop all loaded, we know the Node's ID and
- // can add that to our logging context.
- ctx = log.WithLogger(ctx, log.G(ctx).WithField("node.id", n.NodeID()))
- // Next, set up the task database. The task database is used by the agent
- // to keep a persistent local record of its tasks. Since every manager also
- // has an agent, every node needs a task database, so we do this regardless
- // of role.
- taskDBPath := filepath.Join(n.config.StateDir, "worker", "tasks.db")
- // Doing os.MkdirAll will create the necessary directory path for the task
- // database if it doesn't already exist, and if it does already exist, no
- // error will be returned, so we use this regardless of whether this node
- // is new or not.
- if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
- return err
- }
- db, err := bolt.Open(taskDBPath, 0666, nil)
- if err != nil {
- return err
- }
- defer db.Close()
- // agentDone is a channel that represents the agent having exited. We start
- // the agent in a goroutine a few blocks down, and before that goroutine
- // exits, it closes this channel to signal to the goroutine just below to
- // terminate.
- agentDone := make(chan struct{})
- // This goroutine is the node changes loop. The n.notifyNodeChange
- // channel is passed to the agent. When an new node object gets sent down
- // to the agent, it gets passed back up to this node object, so that we can
- // check if a role update or a root certificate rotation is required. This
- // handles root rotation, but the renewer handles regular certification
- // rotation.
- go func() {
- // lastNodeDesiredRole is the last-seen value of Node.Spec.DesiredRole,
- // used to make role changes "edge triggered" and avoid renewal loops.
- lastNodeDesiredRole := lastSeenRole{role: n.currentRole()}
- for {
- select {
- case <-agentDone:
- return
- case nodeChanges := <-n.notifyNodeChange:
- if nodeChanges.Node != nil {
- // This is a bit complex to be backward compatible with older CAs that
- // don't support the Node.Role field. They only use what's presently
- // called DesiredRole.
- // 1) If DesiredRole changes, kick off a certificate renewal. The renewal
- // is delayed slightly to give Role time to change as well if this is
- // a newer CA. If the certificate we get back doesn't have the expected
- // role, we continue renewing with exponential backoff.
- // 2) If the server is sending us IssuanceStateRotate, renew the cert as
- // requested by the CA.
- desiredRoleChanged := lastNodeDesiredRole.observe(nodeChanges.Node.Spec.DesiredRole)
- if desiredRoleChanged {
- switch nodeChanges.Node.Spec.DesiredRole {
- case api.NodeRoleManager:
- renewer.SetExpectedRole(ca.ManagerRole)
- case api.NodeRoleWorker:
- renewer.SetExpectedRole(ca.WorkerRole)
- }
- }
- if desiredRoleChanged || nodeChanges.Node.Certificate.Status.State == api.IssuanceStateRotate {
- renewer.Renew()
- }
- }
- if nodeChanges.RootCert != nil {
- if bytes.Equal(nodeChanges.RootCert, securityConfig.RootCA().Certs) {
- continue
- }
- newRootCA, err := ca.NewRootCA(nodeChanges.RootCert, nil, nil, ca.DefaultNodeCertExpiration, nil)
- if err != nil {
- log.G(ctx).WithError(err).Error("invalid new root certificate from the dispatcher")
- continue
- }
- if err := securityConfig.UpdateRootCA(&newRootCA); err != nil {
- log.G(ctx).WithError(err).Error("could not use new root CA from dispatcher")
- continue
- }
- if err := ca.SaveRootCA(newRootCA, paths.RootCA); err != nil {
- log.G(ctx).WithError(err).Error("could not save new root certificate from the dispatcher")
- continue
- }
- }
- }
- }
- }()
- // Now we're going to launch the main component goroutines, the Agent, the
- // Manager (maybe) and the certificate updates loop. We shouldn't exit
- // the node object until all 3 of these components have terminated, so we
- // create a waitgroup to block termination of the node until then
- var wg sync.WaitGroup
- wg.Add(3)
- // These two blocks update some of the metrics settings.
- nodeInfo.WithValues(
- securityConfig.ClientTLSCreds.Organization(),
- securityConfig.ClientTLSCreds.NodeID(),
- ).Set(1)
- if n.currentRole() == api.NodeRoleManager {
- nodeManager.Set(1)
- } else {
- nodeManager.Set(0)
- }
- // We created the renewer way up when we were creating the SecurityConfig
- // at the beginning of run, but now we're ready to start receiving
- // CertificateUpdates, and launch a goroutine to handle this. Updates is a
- // channel we iterate containing the results of certificate renewals.
- updates := renewer.Start(ctx)
- go func() {
- for certUpdate := range updates {
- if certUpdate.Err != nil {
- logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
- continue
- }
- // Set the new role, and notify our waiting role changing logic
- // that the role has changed.
- n.Lock()
- n.role = certUpdate.Role
- n.roleCond.Broadcast()
- n.Unlock()
- // Export the new role for metrics
- if n.currentRole() == api.NodeRoleManager {
- nodeManager.Set(1)
- } else {
- nodeManager.Set(0)
- }
- }
- wg.Done()
- }()
- // and, finally, start the two main components: the manager and the agent
- role := n.role
- // Channels to signal when these respective components are up and ready to
- // go.
- managerReady := make(chan struct{})
- agentReady := make(chan struct{})
- // these variables are defined in this scope so that they're closed on by
- // respective goroutines below.
- var managerErr error
- var agentErr error
- go func() {
- // superviseManager is a routine that watches our manager role
- managerErr = n.superviseManager(ctx, securityConfig, paths.RootCA, managerReady, renewer) // store err and loop
- wg.Done()
- cancel()
- }()
- go func() {
- agentErr = n.runAgent(ctx, db, securityConfig, agentReady)
- wg.Done()
- cancel()
- close(agentDone)
- }()
- // This goroutine is what signals that the node has fully started by
- // closing the n.ready channel. First, it waits for the agent to start.
- // Then, if this node is a manager, it will wait on either the manager
- // starting, or the node role changing. This ensures that if the node is
- // demoted before the manager starts, it doesn't get stuck.
- go func() {
- <-agentReady
- if role == ca.ManagerRole {
- workerRole := make(chan struct{})
- waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
- go func() {
- if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
- close(workerRole)
- }
- }()
- select {
- case <-managerReady:
- case <-workerRole:
- }
- waitRoleCancel()
- }
- close(n.ready)
- }()
- // And, finally, we park and wait for the node to close up. If we get any
- // error other than context canceled, we return it.
- wg.Wait()
- if managerErr != nil && errors.Cause(managerErr) != context.Canceled {
- return managerErr
- }
- if agentErr != nil && errors.Cause(agentErr) != context.Canceled {
- return agentErr
- }
- // NOTE(dperny): we return err here, but the last time I can see err being
- // set is when we open the boltdb way up in this method, so I don't know
- // what returning err is supposed to do.
- return err
- }
- // Stop stops node execution
- func (n *Node) Stop(ctx context.Context) error {
- select {
- case <-n.started:
- default:
- return errNodeNotStarted
- }
- // ask agent to clean up assignments
- n.Lock()
- if n.agent != nil {
- if err := n.agent.Leave(ctx); err != nil {
- log.G(ctx).WithError(err).Error("agent failed to clean up assignments")
- }
- }
- n.Unlock()
- n.stopOnce.Do(func() {
- close(n.stopped)
- })
- select {
- case <-n.closed:
- return nil
- case <-ctx.Done():
- return ctx.Err()
- }
- }
- // Err returns the error that caused the node to shutdown or nil. Err blocks
- // until the node has fully shut down.
- func (n *Node) Err(ctx context.Context) error {
- select {
- case <-n.closed:
- return n.err
- case <-ctx.Done():
- return ctx.Err()
- }
- }
- // runAgent starts the node's agent. When the agent has started, the provided
- // ready channel is closed. When the agent exits, this will return the error
- // that caused it.
- func (n *Node) runAgent(ctx context.Context, db *bolt.DB, securityConfig *ca.SecurityConfig, ready chan<- struct{}) error {
- // First, get a channel for knowing when a remote peer has been selected.
- // The value returned from the remotesCh is ignored, we just need to know
- // when the peer is selected
- remotesCh := n.remotes.WaitSelect(ctx)
- // then, we set up a new context to pass specifically to
- // ListenControlSocket, and start that method to wait on a connection on
- // the cluster control API.
- waitCtx, waitCancel := context.WithCancel(ctx)
- controlCh := n.ListenControlSocket(waitCtx)
- // The goal here to wait either until we have a remote peer selected, or
- // connection to the control
- // socket. These are both ways to connect the
- // agent to a manager, and we need to wait until one or the other is
- // available to start the agent
- waitPeer:
- for {
- select {
- case <-ctx.Done():
- break waitPeer
- case <-remotesCh:
- break waitPeer
- case conn := <-controlCh:
- // conn will probably be nil the first time we call this, probably,
- // but only a non-nil conn represent an actual connection.
- if conn != nil {
- break waitPeer
- }
- }
- }
- // We can stop listening for new control socket connections once we're
- // ready
- waitCancel()
- // NOTE(dperny): not sure why we need to recheck the context here. I guess
- // it avoids a race if the context was canceled at the same time that a
- // connection or peer was available. I think it's just an optimization.
- select {
- case <-ctx.Done():
- return ctx.Err()
- default:
- }
- // Now we can go ahead and configure, create, and start the agent.
- secChangesCh, secChangesCancel := securityConfig.Watch()
- defer secChangesCancel()
- rootCA := securityConfig.RootCA()
- issuer := securityConfig.IssuerInfo()
- agentConfig := &agent.Config{
- Hostname: n.config.Hostname,
- ConnBroker: n.connBroker,
- Executor: n.config.Executor,
- DB: db,
- NotifyNodeChange: n.notifyNodeChange,
- NotifyTLSChange: secChangesCh,
- Credentials: securityConfig.ClientTLSCreds,
- NodeTLSInfo: &api.NodeTLSInfo{
- TrustRoot: rootCA.Certs,
- CertIssuerPublicKey: issuer.PublicKey,
- CertIssuerSubject: issuer.Subject,
- },
- FIPS: n.config.FIPS,
- }
- // if a join address has been specified, then if the agent fails to connect
- // due to a TLS error, fail fast - don't keep re-trying to join
- if n.config.JoinAddr != "" {
- agentConfig.SessionTracker = &firstSessionErrorTracker{}
- }
- a, err := agent.New(agentConfig)
- if err != nil {
- return err
- }
- if err := a.Start(ctx); err != nil {
- return err
- }
- n.Lock()
- n.agent = a
- n.Unlock()
- defer func() {
- n.Lock()
- n.agent = nil
- n.Unlock()
- }()
- // when the agent indicates that it is ready, we close the ready channel.
- go func() {
- <-a.Ready()
- close(ready)
- }()
- // todo: manually call stop on context cancellation?
- return a.Err(context.Background())
- }
- // Ready returns a channel that is closed after node's initialization has
- // completes for the first time.
- func (n *Node) Ready() <-chan struct{} {
- return n.ready
- }
- func (n *Node) setControlSocket(conn *grpc.ClientConn) {
- n.Lock()
- if n.conn != nil {
- n.conn.Close()
- }
- n.conn = conn
- n.connBroker.SetLocalConn(conn)
- n.connCond.Broadcast()
- n.Unlock()
- }
- // ListenControlSocket listens changes of a connection for managing the
- // cluster control api
- func (n *Node) ListenControlSocket(ctx context.Context) <-chan *grpc.ClientConn {
- c := make(chan *grpc.ClientConn, 1)
- n.RLock()
- conn := n.conn
- c <- conn
- done := make(chan struct{})
- go func() {
- select {
- case <-ctx.Done():
- n.connCond.Broadcast()
- case <-done:
- }
- }()
- go func() {
- defer close(c)
- defer close(done)
- defer n.RUnlock()
- for {
- select {
- case <-ctx.Done():
- return
- default:
- }
- if conn == n.conn {
- n.connCond.Wait()
- continue
- }
- conn = n.conn
- select {
- case c <- conn:
- case <-ctx.Done():
- return
- }
- }
- }()
- return c
- }
- // NodeID returns current node's ID. May be empty if not set.
- func (n *Node) NodeID() string {
- n.RLock()
- defer n.RUnlock()
- return n.nodeID
- }
- // Manager returns manager instance started by node. May be nil.
- func (n *Node) Manager() *manager.Manager {
- n.RLock()
- defer n.RUnlock()
- return n.manager
- }
- // Agent returns agent instance started by node. May be nil.
- func (n *Node) Agent() *agent.Agent {
- n.RLock()
- defer n.RUnlock()
- return n.agent
- }
- // IsStateDirty returns true if any objects have been added to raft which make
- // the state "dirty". Currently, the existence of any object other than the
- // default cluster or the local node implies a dirty state.
- func (n *Node) IsStateDirty() (bool, error) {
- n.RLock()
- defer n.RUnlock()
- if n.manager == nil {
- return false, errors.New("node is not a manager")
- }
- return n.manager.IsStateDirty()
- }
- // Remotes returns a list of known peers known to node.
- func (n *Node) Remotes() []api.Peer {
- weights := n.remotes.Weights()
- remotes := make([]api.Peer, 0, len(weights))
- for p := range weights {
- remotes = append(remotes, p)
- }
- return remotes
- }
- func (n *Node) loadSecurityConfig(ctx context.Context, paths *ca.SecurityConfigPaths) (*ca.SecurityConfig, func() error, error) {
- var (
- securityConfig *ca.SecurityConfig
- cancel func() error
- )
- krw := ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
- if err := krw.Migrate(); err != nil {
- return nil, nil, err
- }
- // Check if we already have a valid certificates on disk.
- rootCA, err := ca.GetLocalRootCA(paths.RootCA)
- if err != nil && err != ca.ErrNoLocalRootCA {
- return nil, nil, err
- }
- if err == nil {
- // if forcing a new cluster, we allow the certificates to be expired - a new set will be generated
- securityConfig, cancel, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
- if err != nil {
- _, isInvalidKEK := errors.Cause(err).(ca.ErrInvalidKEK)
- if isInvalidKEK {
- return nil, nil, ErrInvalidUnlockKey
- } else if !os.IsNotExist(err) {
- return nil, nil, errors.Wrapf(err, "error while loading TLS certificate in %s", paths.Node.Cert)
- }
- }
- }
- if securityConfig == nil {
- if n.config.JoinAddr == "" {
- // if we're not joining a cluster, bootstrap a new one - and we have to set the unlock key
- n.unlockKey = nil
- if n.config.AutoLockManagers {
- n.unlockKey = encryption.GenerateSecretKey()
- }
- krw = ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{})
- rootCA, err = ca.CreateRootCA(ca.DefaultRootCN)
- if err != nil {
- return nil, nil, err
- }
- if err := ca.SaveRootCA(rootCA, paths.RootCA); err != nil {
- return nil, nil, err
- }
- log.G(ctx).Debug("generated CA key and certificate")
- } else if err == ca.ErrNoLocalRootCA { // from previous error loading the root CA from disk
- rootCA, err = ca.DownloadRootCA(ctx, paths.RootCA, n.config.JoinToken, n.connBroker)
- if err != nil {
- return nil, nil, err
- }
- log.G(ctx).Debug("downloaded CA certificate")
- }
- // Obtain new certs and setup TLS certificates renewal for this node:
- // - If certificates weren't present on disk, we call CreateSecurityConfig, which blocks
- // until a valid certificate has been issued.
- // - We wait for CreateSecurityConfig to finish since we need a certificate to operate.
- // Attempt to load certificate from disk
- securityConfig, cancel, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
- if err == nil {
- log.G(ctx).WithFields(logrus.Fields{
- "node.id": securityConfig.ClientTLSCreds.NodeID(),
- }).Debugf("loaded TLS certificate")
- } else {
- if _, ok := errors.Cause(err).(ca.ErrInvalidKEK); ok {
- return nil, nil, ErrInvalidUnlockKey
- }
- log.G(ctx).WithError(err).Debugf("no node credentials found in: %s", krw.Target())
- securityConfig, cancel, err = rootCA.CreateSecurityConfig(ctx, krw, ca.CertificateRequestConfig{
- Token: n.config.JoinToken,
- Availability: n.config.Availability,
- ConnBroker: n.connBroker,
- })
- if err != nil {
- return nil, nil, err
- }
- }
- }
- n.Lock()
- n.role = securityConfig.ClientTLSCreds.Role()
- n.nodeID = securityConfig.ClientTLSCreds.NodeID()
- n.roleCond.Broadcast()
- n.Unlock()
- return securityConfig, cancel, nil
- }
- func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) error {
- opts := []grpc.DialOption{
- grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor),
- grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor),
- }
- insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
- opts = append(opts, grpc.WithTransportCredentials(insecureCreds))
- addr := n.config.ListenControlAPI
- opts = append(opts, grpc.WithDialer(
- func(addr string, timeout time.Duration) (net.Conn, error) {
- return xnet.DialTimeoutLocal(addr, timeout)
- }))
- conn, err := grpc.Dial(addr, opts...)
- if err != nil {
- return err
- }
- client := api.NewHealthClient(conn)
- for {
- resp, err := client.Check(ctx, &api.HealthCheckRequest{Service: "ControlAPI"})
- if err != nil {
- return err
- }
- if resp.Status == api.HealthCheckResponse_SERVING {
- break
- }
- time.Sleep(500 * time.Millisecond)
- }
- n.setControlSocket(conn)
- if ready != nil {
- close(ready)
- }
- return nil
- }
- // waitRole takes a context and a role. it the blocks until the context is
- // canceled or the node's role updates to the provided role. returns nil when
- // the node has acquired the provided role, or ctx.Err() if the context is
- // canceled
- func (n *Node) waitRole(ctx context.Context, role string) error {
- n.roleCond.L.Lock()
- if role == n.role {
- n.roleCond.L.Unlock()
- return nil
- }
- finishCh := make(chan struct{})
- defer close(finishCh)
- go func() {
- select {
- case <-finishCh:
- case <-ctx.Done():
- // call broadcast to shutdown this function
- n.roleCond.Broadcast()
- }
- }()
- defer n.roleCond.L.Unlock()
- for role != n.role {
- n.roleCond.Wait()
- select {
- case <-ctx.Done():
- return ctx.Err()
- default:
- }
- }
- return nil
- }
- // runManager runs the manager on this node. It returns a boolean indicating if
- // the stoppage was due to a role change, and an error indicating why the
- // manager stopped
- func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, workerRole <-chan struct{}) (bool, error) {
- // First, set up this manager's advertise and listen addresses, if
- // provided. they might not be provided if this node is joining the cluster
- // instead of creating a new one.
- var remoteAPI *manager.RemoteAddrs
- if n.config.ListenRemoteAPI != "" {
- remoteAPI = &manager.RemoteAddrs{
- ListenAddr: n.config.ListenRemoteAPI,
- AdvertiseAddr: n.config.AdvertiseRemoteAPI,
- }
- }
- joinAddr := n.config.JoinAddr
- if joinAddr == "" {
- remoteAddr, err := n.remotes.Select(n.NodeID())
- if err == nil {
- joinAddr = remoteAddr.Addr
- }
- }
- m, err := manager.New(&manager.Config{
- ForceNewCluster: n.config.ForceNewCluster,
- RemoteAPI: remoteAPI,
- ControlAPI: n.config.ListenControlAPI,
- SecurityConfig: securityConfig,
- ExternalCAs: n.config.ExternalCAs,
- JoinRaft: joinAddr,
- ForceJoin: n.config.JoinAddr != "",
- StateDir: n.config.StateDir,
- HeartbeatTick: n.config.HeartbeatTick,
- ElectionTick: n.config.ElectionTick,
- AutoLockManagers: n.config.AutoLockManagers,
- UnlockKey: n.unlockKey,
- Availability: n.config.Availability,
- PluginGetter: n.config.PluginGetter,
- RootCAPaths: rootPaths,
- })
- if err != nil {
- return false, err
- }
- // The done channel is used to signal that the manager has exited.
- done := make(chan struct{})
- // runErr is an error value set by the goroutine that runs the manager
- var runErr error
- // The context used to start this might have a logger associated with it
- // that we'd like to reuse, but we don't want to use that context, so we
- // pass to the goroutine only the logger, and create a new context with
- //that logger.
- go func(logger *logrus.Entry) {
- if err := m.Run(log.WithLogger(context.Background(), logger)); err != nil {
- runErr = err
- }
- close(done)
- }(log.G(ctx))
- // clearData is set in the select below, and is used to signal why the
- // manager is stopping, and indicate whether or not to delete raft data and
- // keys when stopping the manager.
- var clearData bool
- defer func() {
- n.Lock()
- n.manager = nil
- n.Unlock()
- m.Stop(ctx, clearData)
- <-done
- n.setControlSocket(nil)
- }()
- n.Lock()
- n.manager = m
- n.Unlock()
- connCtx, connCancel := context.WithCancel(ctx)
- defer connCancel()
- // launch a goroutine that will manage our local connection to the manager
- // from the agent. Remember the managerReady channel created way back in
- // run? This is actually where we close it. Not when the manager starts,
- // but when a connection to the control socket has been established.
- go n.initManagerConnection(connCtx, ready)
- // wait for manager stop or for role change
- // The manager can be stopped one of 4 ways:
- // 1. The manager may have errored out and returned an error, closing the
- // done channel in the process
- // 2. The node may have been demoted to a worker. In this case, we're gonna
- // have to stop the manager ourselves, setting clearData to true so the
- // local raft data, certs, keys, etc, are nuked.
- // 3. The manager may have been booted from raft. This could happen if it's
- // removed from the raft quorum but the role update hasn't registered
- // yet. The fact that there is more than 1 code path to cause the
- // manager to exit is a possible source of bugs.
- // 4. The context may have been canceled from above, in which case we
- // should stop the manager ourselves, but indicate that this is NOT a
- // demotion.
- select {
- case <-done:
- return false, runErr
- case <-workerRole:
- log.G(ctx).Info("role changed to worker, stopping manager")
- clearData = true
- case <-m.RemovedFromRaft():
- log.G(ctx).Info("manager removed from raft cluster, stopping manager")
- clearData = true
- case <-ctx.Done():
- return false, ctx.Err()
- }
- return clearData, nil
- }
- // superviseManager controls whether or not we are running a manager on this
- // node
- func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, renewer *ca.TLSRenewer) error {
- // superviseManager is a loop, because we can come in and out of being a
- // manager, and need to appropriately handle that without disrupting the
- // node functionality.
- for {
- // if we're not a manager, we're just gonna park here and wait until we
- // are. For normal agent nodes, we'll stay here forever, as intended.
- if err := n.waitRole(ctx, ca.ManagerRole); err != nil {
- return err
- }
- // Once we know we are a manager, we get ourselves ready for when we
- // lose that role. we create a channel to signal that we've become a
- // worker, and close it when n.waitRole completes.
- workerRole := make(chan struct{})
- waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
- go func() {
- if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
- close(workerRole)
- }
- }()
- // the ready channel passed to superviseManager is in turn passed down
- // to the runManager function. It's used to signal to the caller that
- // the manager has started.
- wasRemoved, err := n.runManager(ctx, securityConfig, rootPaths, ready, workerRole)
- if err != nil {
- waitRoleCancel()
- return errors.Wrap(err, "manager stopped")
- }
- // If the manager stopped running and our role is still
- // "manager", it's possible that the manager was demoted and
- // the agent hasn't realized this yet. We should wait for the
- // role to change instead of restarting the manager immediately.
- err = func() error {
- timer := time.NewTimer(roleChangeTimeout)
- defer timer.Stop()
- defer waitRoleCancel()
- select {
- case <-timer.C:
- case <-workerRole:
- return nil
- case <-ctx.Done():
- return ctx.Err()
- }
- if !wasRemoved {
- log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
- return nil
- }
- // We need to be extra careful about restarting the
- // manager. It may cause the node to wrongly join under
- // a new Raft ID. Since we didn't see a role change
- // yet, force a certificate renewal. If the certificate
- // comes back with a worker role, we know we shouldn't
- // restart the manager. However, if we don't see
- // workerRole get closed, it means we didn't switch to
- // a worker certificate, either because we couldn't
- // contact a working CA, or because we've been
- // re-promoted. In this case, we must assume we were
- // re-promoted, and restart the manager.
- log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal")
- timer.Reset(roleChangeTimeout)
- renewer.Renew()
- // Now that the renewal request has been sent to the
- // renewal goroutine, wait for a change in role.
- select {
- case <-timer.C:
- log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
- case <-workerRole:
- case <-ctx.Done():
- return ctx.Err()
- }
- return nil
- }()
- if err != nil {
- return err
- }
- // set ready to nil after the first time we've gone through this, as we
- // don't need to signal after the first time that the manager is ready.
- ready = nil
- }
- }
- // DowngradeKey reverts the node key to older format so that it can
- // run on older version of swarmkit
- func (n *Node) DowngradeKey() error {
- paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory))
- krw := ca.NewKeyReadWriter(paths.Node, n.config.UnlockKey, nil)
- return krw.DowngradeKey()
- }
- type persistentRemotes struct {
- sync.RWMutex
- c *sync.Cond
- remotes.Remotes
- storePath string
- lastSavedState []api.Peer
- }
- func newPersistentRemotes(f string, peers ...api.Peer) *persistentRemotes {
- pr := &persistentRemotes{
- storePath: f,
- Remotes: remotes.NewRemotes(peers...),
- }
- pr.c = sync.NewCond(pr.RLocker())
- return pr
- }
- func (s *persistentRemotes) Observe(peer api.Peer, weight int) {
- s.Lock()
- defer s.Unlock()
- s.Remotes.Observe(peer, weight)
- s.c.Broadcast()
- if err := s.save(); err != nil {
- logrus.Errorf("error writing cluster state file: %v", err)
- return
- }
- return
- }
- func (s *persistentRemotes) Remove(peers ...api.Peer) {
- s.Lock()
- defer s.Unlock()
- s.Remotes.Remove(peers...)
- if err := s.save(); err != nil {
- logrus.Errorf("error writing cluster state file: %v", err)
- return
- }
- return
- }
- func (s *persistentRemotes) save() error {
- weights := s.Weights()
- remotes := make([]api.Peer, 0, len(weights))
- for r := range weights {
- remotes = append(remotes, r)
- }
- sort.Sort(sortablePeers(remotes))
- if reflect.DeepEqual(remotes, s.lastSavedState) {
- return nil
- }
- dt, err := json.Marshal(remotes)
- if err != nil {
- return err
- }
- s.lastSavedState = remotes
- return ioutils.AtomicWriteFile(s.storePath, dt, 0600)
- }
- // WaitSelect waits until at least one remote becomes available and then selects one.
- func (s *persistentRemotes) WaitSelect(ctx context.Context) <-chan api.Peer {
- c := make(chan api.Peer, 1)
- s.RLock()
- done := make(chan struct{})
- go func() {
- select {
- case <-ctx.Done():
- s.c.Broadcast()
- case <-done:
- }
- }()
- go func() {
- defer s.RUnlock()
- defer close(c)
- defer close(done)
- for {
- if ctx.Err() != nil {
- return
- }
- p, err := s.Select()
- if err == nil {
- c <- p
- return
- }
- s.c.Wait()
- }
- }()
- return c
- }
- // sortablePeers is a sort wrapper for []api.Peer
- type sortablePeers []api.Peer
- func (sp sortablePeers) Less(i, j int) bool { return sp[i].NodeID < sp[j].NodeID }
- func (sp sortablePeers) Len() int { return len(sp) }
- func (sp sortablePeers) Swap(i, j int) { sp[i], sp[j] = sp[j], sp[i] }
- // firstSessionErrorTracker is a utility that helps determine whether the agent should exit after
- // a TLS failure on establishing the first session. This should only happen if a join address
- // is specified. If establishing the first session succeeds, but later on some session fails
- // because of a TLS error, we don't want to exit the agent because a previously successful
- // session indicates that the TLS error may be a transient issue.
- type firstSessionErrorTracker struct {
- mu sync.Mutex
- pastFirstSession bool
- err error
- }
- func (fs *firstSessionErrorTracker) SessionEstablished() {
- fs.mu.Lock()
- fs.pastFirstSession = true
- fs.mu.Unlock()
- }
- func (fs *firstSessionErrorTracker) SessionError(err error) {
- fs.mu.Lock()
- fs.err = err
- fs.mu.Unlock()
- }
- func (fs *firstSessionErrorTracker) SessionClosed() error {
- fs.mu.Lock()
- defer fs.mu.Unlock()
- // unfortunately grpc connection errors are type grpc.rpcError, which are not exposed, and we can't get at the underlying error type
- if !fs.pastFirstSession && grpc.Code(fs.err) == codes.Internal &&
- strings.HasPrefix(grpc.ErrorDesc(fs.err), "connection error") && strings.Contains(grpc.ErrorDesc(fs.err), "transport: x509:") {
- return fs.err
- }
- return nil
- }
|