diff --git a/hack/vendor.sh b/hack/vendor.sh index df578bec66..9ab45de322 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -139,7 +139,7 @@ clone git github.com/docker/docker-credential-helpers v0.3.0 clone git github.com/docker/containerd v0.2.4 # cluster -clone git github.com/docker/swarmkit de507ff6b0ee99002d56a784e095c753eab1ad61 +clone git github.com/docker/swarmkit a2abe794f7a1cfe0ed376fbc7c107ab3d6cf7705 clone git github.com/golang/mock bd3c8e81be01eef76d4b503f5e687d2d1354d2d9 clone git github.com/gogo/protobuf 43a2e0b1c32252bfbbdf81f7faa7a88fb3fa4028 clone git github.com/cloudflare/cfssl b895b0549c0ff676f92cf09ba971ae02bb41367b diff --git a/vendor/src/github.com/docker/swarmkit/agent/agent.go b/vendor/src/github.com/docker/swarmkit/agent/agent.go index a85eafcf3b..14da074721 100644 --- a/vendor/src/github.com/docker/swarmkit/agent/agent.go +++ b/vendor/src/github.com/docker/swarmkit/agent/agent.go @@ -204,7 +204,6 @@ func (a *Agent) run(ctx context.Context) { delay := time.Duration(rand.Int63n(int64(backoff))) session = newSession(ctx, a, delay) registered = session.registered - sessionq = a.sessionq case <-a.stopped: // TODO(stevvooe): Wait on shutdown and cleanup. May need to pump // this loop a few times. @@ -320,7 +319,7 @@ func (a *Agent) UpdateTaskStatus(ctx context.Context, taskID string, status *api err = nil // dispatcher no longer cares about this task. } else { log.G(ctx).WithError(err).Error("closing session after fatal error") - session.close() + session.sendError(err) } } else { log.G(ctx).Debug("task status reported") diff --git a/vendor/src/github.com/docker/swarmkit/agent/session.go b/vendor/src/github.com/docker/swarmkit/agent/session.go index 6ad6a4f22e..638b75e170 100644 --- a/vendor/src/github.com/docker/swarmkit/agent/session.go +++ b/vendor/src/github.com/docker/swarmkit/agent/session.go @@ -301,6 +301,16 @@ func (s *session) sendTaskStatuses(ctx context.Context, updates ...*api.UpdateTa return updates[n:], nil } +// sendError is used to send errors to errs channel and trigger session recreation +func (s *session) sendError(err error) { + select { + case s.errs <- err: + case <-s.closed: + } +} + +// close closing session. It should be called only in <-session.errs branch +// of event loop. func (s *session) close() error { s.closeOnce.Do(func() { if s.conn != nil { diff --git a/vendor/src/github.com/docker/swarmkit/ca/certificates.go b/vendor/src/github.com/docker/swarmkit/ca/certificates.go index 2d0fe24f66..f631c199c9 100644 --- a/vendor/src/github.com/docker/swarmkit/ca/certificates.go +++ b/vendor/src/github.com/docker/swarmkit/ca/certificates.go @@ -683,26 +683,27 @@ func GetRemoteSignedCertificate(ctx context.Context, csr []byte, token string, r } } -// readCertExpiration returns the number of months left for certificate expiration -func readCertExpiration(paths CertPaths) (time.Duration, error) { +// readCertValidity returns the certificate issue and expiration time +func readCertValidity(paths CertPaths) (time.Time, time.Time, error) { + var zeroTime time.Time // Read the Cert cert, err := ioutil.ReadFile(paths.Cert) if err != nil { log.Debugf("failed to read certificate file: %s", paths.Cert) - return time.Hour, err + return zeroTime, zeroTime, err } // Create an x509 certificate out of the contents on disk certBlock, _ := pem.Decode([]byte(cert)) if certBlock == nil { - return time.Hour, fmt.Errorf("failed to decode certificate block") + return zeroTime, zeroTime, errors.New("failed to decode certificate block") } X509Cert, err := x509.ParseCertificate(certBlock.Bytes) if err != nil { - return time.Hour, err + return zeroTime, zeroTime, err } - return X509Cert.NotAfter.Sub(time.Now()), nil + return X509Cert.NotBefore, X509Cert.NotAfter, nil } diff --git a/vendor/src/github.com/docker/swarmkit/ca/config.go b/vendor/src/github.com/docker/swarmkit/ca/config.go index e831cc0812..c5d77e4324 100644 --- a/vendor/src/github.com/docker/swarmkit/ca/config.go +++ b/vendor/src/github.com/docker/swarmkit/ca/config.go @@ -313,8 +313,8 @@ func RenewTLSConfig(ctx context.Context, s *SecurityConfig, baseCertDir string, // Since the expiration of the certificate is managed remotely we should update our // retry timer on every iteration of this loop. - // Retrieve the time until the certificate expires. - expiresIn, err := readCertExpiration(paths.Node) + // Retrieve the current certificate expiration information. + validFrom, validUntil, err := readCertValidity(paths.Node) if err != nil { // We failed to read the expiration, let's stick with the starting default log.Errorf("failed to read the expiration of the TLS certificate in: %s", paths.Node.Cert) @@ -322,12 +322,12 @@ func RenewTLSConfig(ctx context.Context, s *SecurityConfig, baseCertDir string, } else { // If we have an expired certificate, we let's stick with the starting default in // the hope that this is a temporary clock skew. - if expiresIn.Minutes() < 0 { - log.Debugf("failed to create a new client TLS config: %v", err) - updates <- CertificateUpdate{Err: fmt.Errorf("TLS Certificate is expired")} + if validUntil.Before(time.Now()) { + log.WithError(err).Errorf("failed to create a new client TLS config") + updates <- CertificateUpdate{Err: errors.New("TLS certificate is expired")} } else { // Random retry time between 50% and 80% of the total time to expiration - retry = calculateRandomExpiry(expiresIn) + retry = calculateRandomExpiry(validFrom, validUntil) } } @@ -391,18 +391,16 @@ func RenewTLSConfig(ctx context.Context, s *SecurityConfig, baseCertDir string, return updates } -// calculateRandomExpiry returns a random duration between 50% and 80% of the original -// duration -func calculateRandomExpiry(expiresIn time.Duration) time.Duration { - if expiresIn.Minutes() < 1 { - return time.Second - } +// calculateRandomExpiry returns a random duration between 50% and 80% of the +// original validity period +func calculateRandomExpiry(validFrom, validUntil time.Time) time.Duration { + duration := validUntil.Sub(validFrom) var randomExpiry int // Our lower bound of renewal will be half of the total expiration time - minValidity := int(expiresIn.Minutes() * CertLowerRotationRange) + minValidity := int(duration.Minutes() * CertLowerRotationRange) // Our upper bound of renewal will be 80% of the total expiration time - maxValidity := int(expiresIn.Minutes() * CertUpperRotationRange) + maxValidity := int(duration.Minutes() * CertUpperRotationRange) // Let's select a random number of minutes between min and max, and set our retry for that // Using randomly selected rotation allows us to avoid certificate thundering herds. if maxValidity-minValidity < 1 { @@ -411,7 +409,11 @@ func calculateRandomExpiry(expiresIn time.Duration) time.Duration { randomExpiry = rand.Intn(maxValidity-minValidity) + int(minValidity) } - return time.Duration(randomExpiry) * time.Minute + expiry := validFrom.Add(time.Duration(randomExpiry) * time.Minute).Sub(time.Now()) + if expiry < 0 { + return 0 + } + return expiry } // LoadTLSCreds loads tls credentials from the specified path and verifies that diff --git a/vendor/src/github.com/docker/swarmkit/manager/state/raft/raft.go b/vendor/src/github.com/docker/swarmkit/manager/state/raft/raft.go index cbda2c1db2..bc22205447 100644 --- a/vendor/src/github.com/docker/swarmkit/manager/state/raft/raft.go +++ b/vendor/src/github.com/docker/swarmkit/manager/state/raft/raft.go @@ -95,7 +95,7 @@ type Node struct { wait *wait wal *wal.WAL snapshotter *snap.Snapshotter - restored bool + campaignWhenAble bool signalledLeadership uint32 isMember uint32 joinAddr string @@ -281,6 +281,7 @@ func (n *Node) JoinAndStart() error { if n.joinAddr != "" { n.Config.Logger.Warning("ignoring request to join cluster, because raft state already exists") } + n.campaignWhenAble = true n.Node = raft.RestartNode(n.Config) atomic.StoreUint32(&n.isMember, 1) return nil @@ -424,15 +425,16 @@ func (n *Node) Run(ctx context.Context) error { // If we are the only registered member after // restoring from the state, campaign to be the // leader. - if !n.restored { - // Node ID should be in the progress list to Campaign - _, ok := n.Node.Status().Progress[n.Config.ID] - if len(n.cluster.Members()) <= 1 && ok { + if n.campaignWhenAble { + members := n.cluster.Members() + if len(members) >= 1 { + n.campaignWhenAble = false + } + if len(members) == 1 && members[n.Config.ID] != nil { if err := n.Campaign(n.Ctx); err != nil { panic("raft: cannot campaign to be the leader on node restore") } } - n.restored = true } // Advance the state machine @@ -887,6 +889,7 @@ func (n *Node) registerNode(node *api.RaftMember) error { } return err } + return nil }