浏览代码

vendor: update containerd and swarmkit

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
Alexander Morozov 9 年之前
父节点
当前提交
eeceb0ccc8
共有 57 个文件被更改,包括 3572 次插入1969 次删除
  1. 1 1
      Dockerfile
  2. 1 1
      Dockerfile.aarch64
  3. 1 1
      Dockerfile.armhf
  4. 1 1
      Dockerfile.ppc64le
  5. 1 1
      Dockerfile.s390x
  6. 1 1
      Dockerfile.simple
  7. 2 2
      hack/vendor.sh
  8. 10 5
      vendor/src/github.com/docker/containerd/api/grpc/types/api.pb.go
  9. 0 34
      vendor/src/github.com/docker/engine-api/types/reference/image_reference.go
  10. 68 7
      vendor/src/github.com/docker/swarmkit/agent/agent.go
  11. 1 1
      vendor/src/github.com/docker/swarmkit/agent/config.go
  12. 7 2
      vendor/src/github.com/docker/swarmkit/agent/exec/controller.go
  13. 33 11
      vendor/src/github.com/docker/swarmkit/agent/node.go
  14. 84 47
      vendor/src/github.com/docker/swarmkit/agent/session.go
  15. 72 22
      vendor/src/github.com/docker/swarmkit/agent/worker.go
  16. 114 60
      vendor/src/github.com/docker/swarmkit/api/ca.pb.go
  17. 400 333
      vendor/src/github.com/docker/swarmkit/api/control.pb.go
  18. 730 71
      vendor/src/github.com/docker/swarmkit/api/dispatcher.pb.go
  19. 48 2
      vendor/src/github.com/docker/swarmkit/api/dispatcher.proto
  20. 9 4
      vendor/src/github.com/docker/swarmkit/api/duration/duration.pb.go
  21. 53 25
      vendor/src/github.com/docker/swarmkit/api/health.pb.go
  22. 157 98
      vendor/src/github.com/docker/swarmkit/api/objects.pb.go
  23. 4 0
      vendor/src/github.com/docker/swarmkit/api/objects.proto
  24. 132 76
      vendor/src/github.com/docker/swarmkit/api/raft.pb.go
  25. 1 1
      vendor/src/github.com/docker/swarmkit/api/raft.proto
  26. 70 40
      vendor/src/github.com/docker/swarmkit/api/resource.pb.go
  27. 6 3
      vendor/src/github.com/docker/swarmkit/api/snapshot.pb.go
  28. 49 41
      vendor/src/github.com/docker/swarmkit/api/specs.pb.go
  29. 9 4
      vendor/src/github.com/docker/swarmkit/api/timestamp/timestamp.pb.go
  30. 431 275
      vendor/src/github.com/docker/swarmkit/api/types.pb.go
  31. 43 9
      vendor/src/github.com/docker/swarmkit/api/types.proto
  32. 9 30
      vendor/src/github.com/docker/swarmkit/ca/certificates.go
  33. 51 22
      vendor/src/github.com/docker/swarmkit/ca/config.go
  34. 4 4
      vendor/src/github.com/docker/swarmkit/ca/server.go
  35. 19 27
      vendor/src/github.com/docker/swarmkit/ca/transport.go
  36. 8 0
      vendor/src/github.com/docker/swarmkit/manager/allocator/network.go
  37. 12 1
      vendor/src/github.com/docker/swarmkit/manager/allocator/networkallocator/portallocator.go
  38. 0 12
      vendor/src/github.com/docker/swarmkit/manager/controlapi/hackpicker/cluster.go
  39. 0 141
      vendor/src/github.com/docker/swarmkit/manager/controlapi/hackpicker/raftpicker.go
  40. 7 6
      vendor/src/github.com/docker/swarmkit/manager/controlapi/service.go
  41. 244 38
      vendor/src/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go
  42. 7 33
      vendor/src/github.com/docker/swarmkit/manager/manager.go
  43. 4 3
      vendor/src/github.com/docker/swarmkit/manager/orchestrator/restart.go
  44. 195 41
      vendor/src/github.com/docker/swarmkit/manager/orchestrator/updater.go
  45. 0 12
      vendor/src/github.com/docker/swarmkit/manager/raftpicker/cluster.go
  46. 0 127
      vendor/src/github.com/docker/swarmkit/manager/raftpicker/raftpicker.go
  47. 20 0
      vendor/src/github.com/docker/swarmkit/manager/raftselector/raftselector.go
  48. 0 153
      vendor/src/github.com/docker/swarmkit/manager/scheduler/indexed_node_heap.go
  49. 48 13
      vendor/src/github.com/docker/swarmkit/manager/scheduler/nodeinfo.go
  50. 115 0
      vendor/src/github.com/docker/swarmkit/manager/scheduler/nodeset.go
  51. 130 59
      vendor/src/github.com/docker/swarmkit/manager/scheduler/scheduler.go
  52. 3 0
      vendor/src/github.com/docker/swarmkit/manager/state/raft/membership/cluster.go
  53. 114 56
      vendor/src/github.com/docker/swarmkit/manager/state/raft/raft.go
  54. 1 1
      vendor/src/github.com/docker/swarmkit/manager/state/raft/util.go
  55. 9 6
      vendor/src/github.com/docker/swarmkit/manager/state/raft/wait.go
  56. 24 1
      vendor/src/github.com/docker/swarmkit/manager/state/watch/watch.go
  57. 9 4
      vendor/src/github.com/docker/swarmkit/protobuf/plugin/plugin.pb.go

+ 1 - 1
Dockerfile

@@ -243,7 +243,7 @@ RUN set -x \
 	&& rm -rf "$GOPATH"
 
 # Install containerd
-ENV CONTAINERD_COMMIT 4c21ad662f71af56c0e6b29c0afef72df441d1ff
+ENV CONTAINERD_COMMIT 2545227b0357eb55e369fa0072baef9ad91cdb69
 RUN set -x \
 	&& export GOPATH="$(mktemp -d)" \
 	&& git clone https://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \

+ 1 - 1
Dockerfile.aarch64

@@ -186,7 +186,7 @@ RUN set -x \
 	&& rm -rf "$GOPATH"
 
 # Install containerd
-ENV CONTAINERD_COMMIT 4c21ad662f71af56c0e6b29c0afef72df441d1ff
+ENV CONTAINERD_COMMIT 2545227b0357eb55e369fa0072baef9ad91cdb69
 RUN set -x \
 	&& export GOPATH="$(mktemp -d)" \
 	&& git clone https://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \

+ 1 - 1
Dockerfile.armhf

@@ -184,7 +184,7 @@ RUN set -x \
 	&& rm -rf "$GOPATH"
 
 # Install containerd
-ENV CONTAINERD_COMMIT 4c21ad662f71af56c0e6b29c0afef72df441d1ff
+ENV CONTAINERD_COMMIT 2545227b0357eb55e369fa0072baef9ad91cdb69
 RUN set -x \
 	&& export GOPATH="$(mktemp -d)" \
 	&& git clone https://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \

+ 1 - 1
Dockerfile.ppc64le

@@ -204,7 +204,7 @@ RUN set -x \
 	&& rm -rf "$GOPATH"
 
 # Install containerd
-ENV CONTAINERD_COMMIT 4c21ad662f71af56c0e6b29c0afef72df441d1ff
+ENV CONTAINERD_COMMIT 2545227b0357eb55e369fa0072baef9ad91cdb69
 RUN set -x \
 	&& export GOPATH="$(mktemp -d)" \
 	&& git clone https://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \

+ 1 - 1
Dockerfile.s390x

@@ -196,7 +196,7 @@ RUN set -x \
 	&& rm -rf "$GOPATH"
 
 # Install containerd
-ENV CONTAINERD_COMMIT 4c21ad662f71af56c0e6b29c0afef72df441d1ff
+ENV CONTAINERD_COMMIT 2545227b0357eb55e369fa0072baef9ad91cdb69
 RUN set -x \
 	&& export GOPATH="$(mktemp -d)" \
 	&& git clone https://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \

+ 1 - 1
Dockerfile.simple

@@ -68,7 +68,7 @@ RUN set -x \
 	&& rm -rf "$GOPATH"
 
 # Install containerd
-ENV CONTAINERD_COMMIT 4c21ad662f71af56c0e6b29c0afef72df441d1ff
+ENV CONTAINERD_COMMIT 2545227b0357eb55e369fa0072baef9ad91cdb69
 RUN set -x \
 	&& export GOPATH="$(mktemp -d)" \
 	&& git clone https://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \

+ 2 - 2
hack/vendor.sh

@@ -141,10 +141,10 @@ clone git google.golang.org/cloud dae7e3d993bc3812a2185af60552bb6b847e52a0 https
 clone git github.com/docker/docker-credential-helpers v0.3.0
 
 # containerd
-clone git github.com/docker/containerd 4c21ad662f71af56c0e6b29c0afef72df441d1ff
+clone git github.com/docker/containerd 2545227b0357eb55e369fa0072baef9ad91cdb69
 
 # cluster
-clone git github.com/docker/swarmkit 27fbaef4ceed648bb575969ccc9083a6e104a719
+clone git github.com/docker/swarmkit 191acc1bbdb13d8ea3b8059dda14a12f8c3903f2
 clone git github.com/golang/mock bd3c8e81be01eef76d4b503f5e687d2d1354d2d9
 clone git github.com/gogo/protobuf v0.3
 clone git github.com/cloudflare/cfssl 7fb22c8cba7ecaf98e4082d22d65800cf45e042a

+ 10 - 5
vendor/src/github.com/docker/containerd/api/grpc/types/api.pb.go

@@ -75,7 +75,9 @@ var _ = math.Inf
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the proto package it is being compiled against.
-const _ = proto.ProtoPackageIsVersion1
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
 
 type GetServerVersionRequest struct {
 }
@@ -223,7 +225,7 @@ func (*Rlimit) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} }
 type User struct {
 	Uid            uint32   `protobuf:"varint,1,opt,name=uid" json:"uid,omitempty"`
 	Gid            uint32   `protobuf:"varint,2,opt,name=gid" json:"gid,omitempty"`
-	AdditionalGids []uint32 `protobuf:"varint,3,rep,name=additionalGids" json:"additionalGids,omitempty"`
+	AdditionalGids []uint32 `protobuf:"varint,3,rep,packed,name=additionalGids" json:"additionalGids,omitempty"`
 }
 
 func (m *User) Reset()                    { *m = User{} }
@@ -385,7 +387,7 @@ type Container struct {
 	Processes  []*Process `protobuf:"bytes,3,rep,name=processes" json:"processes,omitempty"`
 	Status     string     `protobuf:"bytes,4,opt,name=status" json:"status,omitempty"`
 	Labels     []string   `protobuf:"bytes,5,rep,name=labels" json:"labels,omitempty"`
-	Pids       []uint32   `protobuf:"varint,6,rep,name=pids" json:"pids,omitempty"`
+	Pids       []uint32   `protobuf:"varint,6,rep,packed,name=pids" json:"pids,omitempty"`
 	Runtime    string     `protobuf:"bytes,7,opt,name=runtime" json:"runtime,omitempty"`
 }
 
@@ -628,7 +630,7 @@ func (*NetworkStats) Descriptor() ([]byte, []int) { return fileDescriptor0, []in
 
 type CpuUsage struct {
 	TotalUsage        uint64   `protobuf:"varint,1,opt,name=total_usage,json=totalUsage" json:"total_usage,omitempty"`
-	PercpuUsage       []uint64 `protobuf:"varint,2,rep,name=percpu_usage,json=percpuUsage" json:"percpu_usage,omitempty"`
+	PercpuUsage       []uint64 `protobuf:"varint,2,rep,packed,name=percpu_usage,json=percpuUsage" json:"percpu_usage,omitempty"`
 	UsageInKernelmode uint64   `protobuf:"varint,3,opt,name=usage_in_kernelmode,json=usageInKernelmode" json:"usage_in_kernelmode,omitempty"`
 	UsageInUsermode   uint64   `protobuf:"varint,4,opt,name=usage_in_usermode,json=usageInUsermode" json:"usage_in_usermode,omitempty"`
 }
@@ -978,7 +980,7 @@ var _ grpc.ClientConn
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the grpc package it is being compiled against.
-const _ = grpc.SupportPackageIsVersion2
+const _ = grpc.SupportPackageIsVersion3
 
 // Client API for API service
 
@@ -1432,8 +1434,11 @@ var _API_serviceDesc = grpc.ServiceDesc{
 			ServerStreams: true,
 		},
 	},
+	Metadata: fileDescriptor0,
 }
 
+func init() { proto.RegisterFile("api.proto", fileDescriptor0) }
+
 var fileDescriptor0 = []byte{
 	// 2604 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xec, 0x59, 0x4b, 0x6f, 0x1c, 0x5b,

+ 0 - 34
vendor/src/github.com/docker/engine-api/types/reference/image_reference.go

@@ -1,34 +0,0 @@
-package reference
-
-import (
-	distreference "github.com/docker/distribution/reference"
-)
-
-// Parse parses the given references and returns the repository and
-// tag (if present) from it. If there is an error during parsing, it will
-// return an error.
-func Parse(ref string) (string, string, error) {
-	distributionRef, err := distreference.ParseNamed(ref)
-	if err != nil {
-		return "", "", err
-	}
-
-	tag := GetTagFromNamedRef(distributionRef)
-	return distributionRef.Name(), tag, nil
-}
-
-// GetTagFromNamedRef returns a tag from the specified reference.
-// This function is necessary as long as the docker "server" api makes the distinction between repository
-// and tags.
-func GetTagFromNamedRef(ref distreference.Named) string {
-	var tag string
-	switch x := ref.(type) {
-	case distreference.Digested:
-		tag = x.Digest().String()
-	case distreference.NamedTagged:
-		tag = x.Tag()
-	default:
-		tag = "latest"
-	}
-	return tag
-}

+ 68 - 7
vendor/src/github.com/docker/swarmkit/agent/agent.go

@@ -15,6 +15,7 @@ import (
 const (
 	initialSessionFailureBackoff = 100 * time.Millisecond
 	maxSessionFailureBackoff     = 8 * time.Second
+	nodeUpdatePeriod             = 20 * time.Second
 )
 
 // Agent implements the primary node functionality for a member of a swarm
@@ -134,9 +135,18 @@ func (a *Agent) run(ctx context.Context) {
 	log.G(ctx).Debugf("(*Agent).run")
 	defer log.G(ctx).Debugf("(*Agent).run exited")
 
+	// get the node description
+	nodeDescription, err := a.nodeDescriptionWithHostname(ctx)
+	if err != nil {
+		log.G(ctx).WithError(err).WithField("agent", a.config.Executor).Errorf("agent: node description unavailable")
+	}
+	// nodeUpdateTicker is used to periodically check for updates to node description
+	nodeUpdateTicker := time.NewTicker(nodeUpdatePeriod)
+	defer nodeUpdateTicker.Stop()
+
 	var (
 		backoff    time.Duration
-		session    = newSession(ctx, a, backoff, "") // start the initial session
+		session    = newSession(ctx, a, backoff, "", nodeDescription) // start the initial session
 		registered = session.registered
 		ready      = a.ready // first session ready
 		sessionq   chan sessionOperation
@@ -158,9 +168,16 @@ func (a *Agent) run(ctx context.Context) {
 		select {
 		case operation := <-sessionq:
 			operation.response <- operation.fn(session)
-		case msg := <-session.tasks:
-			if err := a.worker.Assign(ctx, msg.Tasks); err != nil {
-				log.G(ctx).WithError(err).Error("task assignment failed")
+		case msg := <-session.assignments:
+			switch msg.Type {
+			case api.AssignmentsMessage_COMPLETE:
+				if err := a.worker.AssignTasks(ctx, msg.UpdateTasks); err != nil {
+					log.G(ctx).WithError(err).Error("failed to synchronize worker assignments")
+				}
+			case api.AssignmentsMessage_INCREMENTAL:
+				if err := a.worker.UpdateTasks(ctx, msg.UpdateTasks, msg.RemoveTasks); err != nil {
+					log.G(ctx).WithError(err).Error("failed to update worker assignments")
+				}
 			}
 		case msg := <-session.messages:
 			if err := a.handleSessionMessage(ctx, msg); err != nil {
@@ -197,10 +214,42 @@ func (a *Agent) run(ctx context.Context) {
 			log.G(ctx).Debugf("agent: rebuild session")
 
 			// select a session registration delay from backoff range.
-			delay := time.Duration(rand.Int63n(int64(backoff)))
-			session = newSession(ctx, a, delay, session.sessionID)
+			delay := time.Duration(0)
+			if backoff > 0 {
+				delay = time.Duration(rand.Int63n(int64(backoff)))
+			}
+			session = newSession(ctx, a, delay, session.sessionID, nodeDescription)
 			registered = session.registered
 			sessionq = a.sessionq
+		case <-nodeUpdateTicker.C:
+			// skip this case if the registration isn't finished
+			if registered != nil {
+				continue
+			}
+			// get the current node description
+			newNodeDescription, err := a.nodeDescriptionWithHostname(ctx)
+			if err != nil {
+				log.G(ctx).WithError(err).WithField("agent", a.config.Executor).Errorf("agent: updated node description unavailable")
+			}
+
+			// if newNodeDescription is nil, it will cause a panic when
+			// trying to create a session. Typically this can happen
+			// if the engine goes down
+			if newNodeDescription == nil {
+				continue
+			}
+
+			// if the node description has changed, update it to the new one
+			// and close the session. The old session will be stopped and a
+			// new one will be created with the updated description
+			if !reflect.DeepEqual(nodeDescription, newNodeDescription) {
+				nodeDescription = newNodeDescription
+				// close the session
+				log.G(ctx).Info("agent: found node update")
+				if err := session.close(); err != nil {
+					log.G(ctx).WithError(err).Error("agent: closing session for node update failed")
+				}
+			}
 		case <-a.stopped:
 			// TODO(stevvooe): Wait on shutdown and cleanup. May need to pump
 			// this loop a few times.
@@ -315,7 +364,8 @@ func (a *Agent) UpdateTaskStatus(ctx context.Context, taskID string, status *api
 				if err == errTaskUnknown {
 					err = nil // dispatcher no longer cares about this task.
 				} else {
-					log.G(ctx).WithError(err).Error("sending task status update failed")
+					log.G(ctx).WithError(err).Error("closing session after fatal error")
+					session.close()
 				}
 			} else {
 				log.G(ctx).Debug("task status reported")
@@ -337,6 +387,17 @@ func (a *Agent) UpdateTaskStatus(ctx context.Context, taskID string, status *api
 	}
 }
 
+// nodeDescriptionWithHostname retrieves node description, and overrides hostname if available
+func (a *Agent) nodeDescriptionWithHostname(ctx context.Context) (*api.NodeDescription, error) {
+	desc, err := a.config.Executor.Describe(ctx)
+
+	// Override hostname
+	if a.config.Hostname != "" && desc != nil {
+		desc.Hostname = a.config.Hostname
+	}
+	return desc, err
+}
+
 // nodesEqual returns true if the node states are functionaly equal, ignoring status,
 // version and other superfluous fields.
 //

+ 1 - 1
vendor/src/github.com/docker/swarmkit/agent/config.go

@@ -29,7 +29,7 @@ type Config struct {
 	NotifyRoleChange chan<- api.NodeRole
 
 	// Credentials is credentials for grpc connection to manager.
-	Credentials credentials.TransportAuthenticator
+	Credentials credentials.TransportCredentials
 }
 
 func (c *Config) validate() error {

+ 7 - 2
vendor/src/github.com/docker/swarmkit/agent/exec/controller.go

@@ -147,7 +147,7 @@ func Do(ctx context.Context, task *api.Task, ctlr Controller) (*api.TaskStatus,
 		if cs, ok := err.(ContainerStatuser); ok {
 			var err error
 			containerStatus, err = cs.ContainerStatus(ctx)
-			if err != nil {
+			if err != nil && !contextDoneError(err) {
 				log.G(ctx).WithError(err).Error("error resolving container status on fatal")
 			}
 		}
@@ -207,7 +207,7 @@ func Do(ctx context.Context, task *api.Task, ctlr Controller) (*api.TaskStatus,
 
 			var err error
 			containerStatus, err = cctlr.ContainerStatus(ctx)
-			if err != nil {
+			if err != nil && !contextDoneError(err) {
 				log.G(ctx).WithError(err).Error("container status unavailable")
 			}
 
@@ -297,3 +297,8 @@ func logStateChange(ctx context.Context, desired, previous, next api.TaskState)
 		log.G(ctx).WithFields(fields).Debug("state changed")
 	}
 }
+
+func contextDoneError(err error) bool {
+	cause := errors.Cause(err)
+	return cause == context.Canceled || cause == context.DeadlineExceeded
+}

+ 33 - 11
vendor/src/github.com/docker/swarmkit/agent/node.go

@@ -120,7 +120,7 @@ func NewNode(c *NodeConfig) (*Node, error) {
 
 	n := &Node{
 		remotes:              newPersistentRemotes(stateFile, p...),
-		role:                 ca.AgentRole,
+		role:                 ca.WorkerRole,
 		config:               c,
 		started:              make(chan struct{}),
 		stopped:              make(chan struct{}),
@@ -194,7 +194,9 @@ func (n *Node) run(ctx context.Context) (err error) {
 		select {
 		case <-ctx.Done():
 		case resp := <-issueResponseChan:
-			logrus.Debugf("Requesting certificate for NodeID: %v", resp.NodeID)
+			log.G(log.WithModule(ctx, "tls")).WithFields(logrus.Fields{
+				"node.id": resp.NodeID,
+			}).Debugf("requesting certificate")
 			n.Lock()
 			n.nodeID = resp.NodeID
 			n.nodeMembership = resp.NodeMembership
@@ -233,7 +235,7 @@ func (n *Node) run(ctx context.Context) (err error) {
 			case apirole := <-n.roleChangeReq:
 				n.Lock()
 				lastRole := n.role
-				role := ca.AgentRole
+				role := ca.WorkerRole
 				if apirole == api.NodeRoleManager {
 					role = ca.ManagerRole
 				}
@@ -242,7 +244,7 @@ func (n *Node) run(ctx context.Context) (err error) {
 					continue
 				}
 				// switch role to agent immediately to shutdown manager early
-				if role == ca.AgentRole {
+				if role == ca.WorkerRole {
 					n.role = role
 					n.roleCond.Broadcast()
 				}
@@ -343,7 +345,7 @@ func (n *Node) Err(ctx context.Context) error {
 	}
 }
 
-func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportAuthenticator, ready chan<- struct{}) error {
+func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportCredentials, ready chan<- struct{}) error {
 	select {
 	case <-ctx.Done():
 	case <-n.remotes.WaitSelect(ctx):
@@ -588,7 +590,7 @@ func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig
 			return err
 		}
 
-		remoteAddr, _ := n.remotes.Select(n.nodeID)
+		remoteAddr, _ := n.remotes.Select(n.NodeID())
 		m, err := manager.New(&manager.Config{
 			ForceNewCluster: n.config.ForceNewCluster,
 			ProtoAddr: map[string]string{
@@ -607,8 +609,9 @@ func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig
 			return err
 		}
 		done := make(chan struct{})
+		var runErr error
 		go func() {
-			m.Run(context.Background()) // todo: store error
+			runErr = m.Run(context.Background())
 			close(done)
 		}()
 
@@ -624,14 +627,31 @@ func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig
 			go func(ready chan struct{}) {
 				select {
 				case <-ready:
-					n.remotes.Observe(api.Peer{NodeID: n.nodeID, Addr: n.config.ListenRemoteAPI}, remotes.DefaultObservationWeight)
+					n.remotes.Observe(api.Peer{NodeID: n.NodeID(), Addr: n.config.ListenRemoteAPI}, remotes.DefaultObservationWeight)
 				case <-connCtx.Done():
 				}
 			}(ready)
 			ready = nil
 		}
 
-		err = n.waitRole(ctx, ca.AgentRole)
+		roleChanged := make(chan error)
+		waitCtx, waitCancel := context.WithCancel(ctx)
+		go func() {
+			err := n.waitRole(waitCtx, ca.WorkerRole)
+			roleChanged <- err
+		}()
+
+		select {
+		case <-done:
+			// Fail out if m.Run() returns error, otherwise wait for
+			// role change.
+			if runErr != nil {
+				err = runErr
+			} else {
+				err = <-roleChanged
+			}
+		case err = <-roleChanged:
+		}
 
 		n.Lock()
 		n.manager = nil
@@ -646,6 +666,7 @@ func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig
 		}
 		connCancel()
 		n.setControlSocket(nil)
+		waitCancel()
 
 		if err != nil {
 			return err
@@ -672,17 +693,18 @@ func newPersistentRemotes(f string, peers ...api.Peer) *persistentRemotes {
 
 func (s *persistentRemotes) Observe(peer api.Peer, weight int) {
 	s.Lock()
+	defer s.Unlock()
 	s.Remotes.Observe(peer, weight)
 	s.c.Broadcast()
 	if err := s.save(); err != nil {
 		logrus.Errorf("error writing cluster state file: %v", err)
-		s.Unlock()
 		return
 	}
-	s.Unlock()
 	return
 }
 func (s *persistentRemotes) Remove(peers ...api.Peer) {
+	s.Lock()
+	defer s.Unlock()
 	s.Remotes.Remove(peers...)
 	if err := s.save(); err != nil {
 		logrus.Errorf("error writing cluster state file: %v", err)

+ 84 - 47
vendor/src/github.com/docker/swarmkit/agent/session.go

@@ -2,8 +2,10 @@ package agent
 
 import (
 	"errors"
+	"sync"
 	"time"
 
+	"github.com/Sirupsen/logrus"
 	"github.com/docker/swarmkit/api"
 	"github.com/docker/swarmkit/log"
 	"github.com/docker/swarmkit/protobuf/ptypes"
@@ -31,26 +33,27 @@ type session struct {
 	conn *grpc.ClientConn
 	addr string
 
-	agent     *Agent
-	sessionID string
-	session   api.Dispatcher_SessionClient
-	errs      chan error
-	messages  chan *api.SessionMessage
-	tasks     chan *api.TasksMessage
+	agent       *Agent
+	sessionID   string
+	session     api.Dispatcher_SessionClient
+	errs        chan error
+	messages    chan *api.SessionMessage
+	assignments chan *api.AssignmentsMessage
 
 	registered chan struct{} // closed registration
 	closed     chan struct{}
+	closeOnce  sync.Once
 }
 
-func newSession(ctx context.Context, agent *Agent, delay time.Duration, sessionID string) *session {
+func newSession(ctx context.Context, agent *Agent, delay time.Duration, sessionID string, description *api.NodeDescription) *session {
 	s := &session{
-		agent:      agent,
-		sessionID:  sessionID,
-		errs:       make(chan error, 1),
-		messages:   make(chan *api.SessionMessage),
-		tasks:      make(chan *api.TasksMessage),
-		registered: make(chan struct{}),
-		closed:     make(chan struct{}),
+		agent:       agent,
+		sessionID:   sessionID,
+		errs:        make(chan error, 1),
+		messages:    make(chan *api.SessionMessage),
+		assignments: make(chan *api.AssignmentsMessage),
+		registered:  make(chan struct{}),
+		closed:      make(chan struct{}),
 	}
 	peer, err := agent.config.Managers.Select()
 	if err != nil {
@@ -68,14 +71,14 @@ func newSession(ctx context.Context, agent *Agent, delay time.Duration, sessionI
 	s.addr = peer.Addr
 	s.conn = cc
 
-	go s.run(ctx, delay)
+	go s.run(ctx, delay, description)
 	return s
 }
 
-func (s *session) run(ctx context.Context, delay time.Duration) {
+func (s *session) run(ctx context.Context, delay time.Duration, description *api.NodeDescription) {
 	time.Sleep(delay) // delay before registering.
 
-	if err := s.start(ctx); err != nil {
+	if err := s.start(ctx, description); err != nil {
 		select {
 		case s.errs <- err:
 		case <-s.closed:
@@ -94,24 +97,14 @@ func (s *session) run(ctx context.Context, delay time.Duration) {
 }
 
 // start begins the session and returns the first SessionMessage.
-func (s *session) start(ctx context.Context) error {
+func (s *session) start(ctx context.Context, description *api.NodeDescription) error {
 	log.G(ctx).Debugf("(*session).start")
 
-	description, err := s.agent.config.Executor.Describe(ctx)
-	if err != nil {
-		log.G(ctx).WithError(err).WithField("executor", s.agent.config.Executor).
-			Errorf("node description unavailable")
-		return err
-	}
-	// Override hostname
-	if s.agent.config.Hostname != "" {
-		description.Hostname = s.agent.config.Hostname
-	}
-
 	errChan := make(chan error, 1)
 	var (
 		msg    *api.SessionMessage
 		stream api.Dispatcher_SessionClient
+		err    error
 	)
 	// Note: we don't defer cancellation of this context, because the
 	// streaming RPC is used after this function returned. We only cancel
@@ -215,22 +208,68 @@ func (s *session) handleSessionMessage(ctx context.Context, msg *api.SessionMess
 }
 
 func (s *session) watch(ctx context.Context) error {
-	log.G(ctx).Debugf("(*session).watch")
-	client := api.NewDispatcherClient(s.conn)
-	watch, err := client.Tasks(ctx, &api.TasksRequest{
-		SessionID: s.sessionID})
-	if err != nil {
-		return err
-	}
+	log := log.G(ctx).WithFields(logrus.Fields{"method": "(*session).watch"})
+	log.Debugf("")
+	var (
+		resp            *api.AssignmentsMessage
+		assignmentWatch api.Dispatcher_AssignmentsClient
+		tasksWatch      api.Dispatcher_TasksClient
+		streamReference string
+		tasksFallback   bool
+		err             error
+	)
 
+	client := api.NewDispatcherClient(s.conn)
 	for {
-		resp, err := watch.Recv()
-		if err != nil {
-			return err
+		// If this is the first time we're running the loop, or there was a reference mismatch
+		// attempt to get the assignmentWatch
+		if assignmentWatch == nil && !tasksFallback {
+			assignmentWatch, err = client.Assignments(ctx, &api.AssignmentsRequest{SessionID: s.sessionID})
+			if err != nil {
+				return err
+			}
+		}
+		// We have an assignmentWatch, let's try to receive an AssignmentMessage
+		if assignmentWatch != nil {
+			// If we get a code = 12 desc = unknown method Assignments, try to use tasks
+			resp, err = assignmentWatch.Recv()
+			if err != nil {
+				if grpc.Code(err) != codes.Unimplemented {
+					return err
+				}
+				tasksFallback = true
+				assignmentWatch = nil
+				log.WithError(err).Infof("falling back to Tasks")
+			}
+		}
+
+		// This code is here for backwards compatibility (so that newer clients can use the
+		// older method Tasks)
+		if tasksWatch == nil && tasksFallback {
+			tasksWatch, err = client.Tasks(ctx, &api.TasksRequest{SessionID: s.sessionID})
+			if err != nil {
+				return err
+			}
+		}
+		if tasksWatch != nil {
+			var taskResp *api.TasksMessage
+			taskResp, err = tasksWatch.Recv()
+			if err != nil {
+				return err
+			}
+			resp = &api.AssignmentsMessage{Type: api.AssignmentsMessage_COMPLETE, UpdateTasks: taskResp.Tasks}
+		}
+
+		// If there seems to be a gap in the stream, let's break out of the inner for and
+		// re-sync (by calling Assignments again).
+		if streamReference != "" && streamReference != resp.AppliesTo {
+			assignmentWatch = nil
+		} else {
+			streamReference = resp.ResultsIn
 		}
 
 		select {
-		case s.tasks <- resp:
+		case s.assignments <- resp:
 		case <-s.closed:
 			return errSessionClosed
 		case <-ctx.Done():
@@ -241,7 +280,6 @@ func (s *session) watch(ctx context.Context) error {
 
 // sendTaskStatus uses the current session to send the status of a single task.
 func (s *session) sendTaskStatus(ctx context.Context, taskID string, status *api.TaskStatus) error {
-
 	client := api.NewDispatcherClient(s.conn)
 	if _, err := client.UpdateTaskStatus(ctx, &api.UpdateTaskStatusRequest{
 		SessionID: s.sessionID,
@@ -302,15 +340,14 @@ func (s *session) sendTaskStatuses(ctx context.Context, updates ...*api.UpdateTa
 }
 
 func (s *session) close() error {
-	select {
-	case <-s.closed:
-		return errSessionClosed
-	default:
+	s.closeOnce.Do(func() {
 		if s.conn != nil {
 			s.agent.config.Managers.ObserveIfExists(api.Peer{Addr: s.addr}, -remotes.DefaultObservationWeight)
 			s.conn.Close()
 		}
+
 		close(s.closed)
-		return nil
-	}
+	})
+
+	return nil
 }

+ 72 - 22
vendor/src/github.com/docker/swarmkit/agent/worker.go

@@ -17,9 +17,13 @@ type Worker interface {
 	// Init prepares the worker for task assignment.
 	Init(ctx context.Context) error
 
-	// Assign the set of tasks to the worker. Tasks outside of this set will be
-	// removed.
-	Assign(ctx context.Context, tasks []*api.Task) error
+	// AssignTasks assigns a complete set of tasks to a worker. Any task not included in
+	// this set will be removed.
+	AssignTasks(ctx context.Context, tasks []*api.Task) error
+
+	// UpdateTasks updates an incremental set of tasks to the worker. Any task not included
+	// either in added or removed will remain untouched.
+	UpdateTasks(ctx context.Context, added []*api.Task, removed []string) error
 
 	// Listen to updates about tasks controlled by the worker. When first
 	// called, the reporter will receive all updates for all tasks controlled
@@ -86,14 +90,37 @@ func (w *worker) Init(ctx context.Context) error {
 	})
 }
 
-// Assign the set of tasks to the worker. Any tasks not previously known will
+// AssignTasks assigns  the set of tasks to the worker. Any tasks not previously known will
 // be started. Any tasks that are in the task set and already running will be
 // updated, if possible. Any tasks currently running on the
 // worker outside the task set will be terminated.
-func (w *worker) Assign(ctx context.Context, tasks []*api.Task) error {
+func (w *worker) AssignTasks(ctx context.Context, tasks []*api.Task) error {
 	w.mu.Lock()
 	defer w.mu.Unlock()
 
+	log.G(ctx).WithFields(logrus.Fields{
+		"len(tasks)": len(tasks),
+	}).Debug("(*worker).AssignTasks")
+
+	return reconcileTaskState(ctx, w, tasks, nil, true)
+}
+
+// UpdateTasks the set of tasks to the worker.
+// Tasks in the added set will be added to the worker, and tasks in the removed set
+// will be removed from the worker
+func (w *worker) UpdateTasks(ctx context.Context, added []*api.Task, removed []string) error {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+
+	log.G(ctx).WithFields(logrus.Fields{
+		"len(added)":   len(added),
+		"len(removed)": len(removed),
+	}).Debug("(*worker).UpdateTasks")
+
+	return reconcileTaskState(ctx, w, added, removed, false)
+}
+
+func reconcileTaskState(ctx context.Context, w *worker, added []*api.Task, removed []string, fullSnapshot bool) error {
 	tx, err := w.db.Begin(true)
 	if err != nil {
 		log.G(ctx).WithError(err).Error("failed starting transaction against task database")
@@ -101,10 +128,9 @@ func (w *worker) Assign(ctx context.Context, tasks []*api.Task) error {
 	}
 	defer tx.Rollback()
 
-	log.G(ctx).WithField("len(tasks)", len(tasks)).Debug("(*worker).Assign")
 	assigned := map[string]struct{}{}
 
-	for _, task := range tasks {
+	for _, task := range added {
 		log.G(ctx).WithFields(
 			logrus.Fields{
 				"task.id":           task.ID,
@@ -135,35 +161,59 @@ func (w *worker) Assign(ctx context.Context, tasks []*api.Task) error {
 					return err
 				}
 			} else {
-				task.Status = *status // overwrite the stale manager status with ours.
+				task.Status = *status
 			}
-
 			w.startTask(ctx, tx, task)
 		}
 
 		assigned[task.ID] = struct{}{}
 	}
 
-	for id, tm := range w.taskManagers {
-		if _, ok := assigned[id]; ok {
-			continue
+	closeManager := func(tm *taskManager) {
+		// when a task is no longer assigned, we shutdown the task manager for
+		// it and leave cleanup to the sweeper.
+		if err := tm.Close(); err != nil {
+			log.G(ctx).WithError(err).Error("error closing task manager")
 		}
+	}
 
-		ctx := log.WithLogger(ctx, log.G(ctx).WithField("task.id", id))
-		if err := SetTaskAssignment(tx, id, false); err != nil {
+	removeTaskAssignment := func(taskID string) error {
+		ctx := log.WithLogger(ctx, log.G(ctx).WithField("task.id", taskID))
+		if err := SetTaskAssignment(tx, taskID, false); err != nil {
 			log.G(ctx).WithError(err).Error("error setting task assignment in database")
-			continue
 		}
+		return err
+	}
+
+	// If this was a complete set of assignments, we're going to remove all the remaining
+	// tasks.
+	if fullSnapshot {
+		for id, tm := range w.taskManagers {
+			if _, ok := assigned[id]; ok {
+				continue
+			}
 
-		delete(w.taskManagers, id)
+			err := removeTaskAssignment(id)
+			if err == nil {
+				delete(w.taskManagers, id)
+				go closeManager(tm)
+			}
+		}
+	} else {
+		// If this was an incremental set of assignments, we're going to remove only the tasks
+		// in the removed set
+		for _, taskID := range removed {
+			err := removeTaskAssignment(taskID)
+			if err != nil {
+				continue
+			}
 
-		go func(tm *taskManager) {
-			// when a task is no longer assigned, we shutdown the task manager for
-			// it and leave cleanup to the sweeper.
-			if err := tm.Close(); err != nil {
-				log.G(ctx).WithError(err).Error("error closing task manager")
+			tm, ok := w.taskManagers[taskID]
+			if ok {
+				delete(w.taskManagers, taskID)
+				go closeManager(tm)
 			}
-		}(tm)
+		}
 	}
 
 	return tx.Commit()

+ 114 - 60
vendor/src/github.com/docker/swarmkit/api/ca.pb.go

@@ -21,10 +21,11 @@ import (
 	grpc "google.golang.org/grpc"
 )
 
-import raftpicker "github.com/docker/swarmkit/manager/raftpicker"
+import raftselector "github.com/docker/swarmkit/manager/raftselector"
 import codes "google.golang.org/grpc/codes"
 import metadata "google.golang.org/grpc/metadata"
 import transport "google.golang.org/grpc/transport"
+import time "time"
 
 import io "io"
 
@@ -285,11 +286,12 @@ func valueToGoStringCa(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringCa(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringCa(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -299,7 +301,7 @@ func extensionToGoStringCa(e map[int32]github_com_gogo_protobuf_proto.Extension)
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 
@@ -309,7 +311,7 @@ var _ grpc.ClientConn
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the grpc package it is being compiled against.
-const _ = grpc.SupportPackageIsVersion2
+const _ = grpc.SupportPackageIsVersion3
 
 // Client API for CA service
 
@@ -371,7 +373,8 @@ var _CA_serviceDesc = grpc.ServiceDesc{
 			Handler:    _CA_GetRootCACertificate_Handler,
 		},
 	},
-	Streams: []grpc.StreamDesc{},
+	Streams:  []grpc.StreamDesc{},
+	Metadata: fileDescriptorCa,
 }
 
 // Client API for NodeCA service
@@ -467,7 +470,8 @@ var _NodeCA_serviceDesc = grpc.ServiceDesc{
 			Handler:    _NodeCA_NodeCertificateStatus_Handler,
 		},
 	},
-	Streams: []grpc.StreamDesc{},
+	Streams:  []grpc.StreamDesc{},
+	Metadata: fileDescriptorCa,
 }
 
 func (m *NodeCertificateStatusRequest) Marshal() (data []byte, err error) {
@@ -668,12 +672,11 @@ func encodeVarintCa(data []byte, offset int, v uint64) int {
 
 type raftProxyCAServer struct {
 	local        CAServer
-	connSelector raftpicker.Interface
-	cluster      raftpicker.RaftCluster
+	connSelector raftselector.ConnProvider
 	ctxMods      []func(context.Context) (context.Context, error)
 }
 
-func NewRaftProxyCAServer(local CAServer, connSelector raftpicker.Interface, cluster raftpicker.RaftCluster, ctxMod func(context.Context) (context.Context, error)) CAServer {
+func NewRaftProxyCAServer(local CAServer, connSelector raftselector.ConnProvider, ctxMod func(context.Context) (context.Context, error)) CAServer {
 	redirectChecker := func(ctx context.Context) (context.Context, error) {
 		s, ok := transport.StreamFromContext(ctx)
 		if !ok {
@@ -695,7 +698,6 @@ func NewRaftProxyCAServer(local CAServer, connSelector raftpicker.Interface, clu
 
 	return &raftProxyCAServer{
 		local:        local,
-		cluster:      cluster,
 		connSelector: connSelector,
 		ctxMods:      mods,
 	}
@@ -710,44 +712,68 @@ func (p *raftProxyCAServer) runCtxMods(ctx context.Context) (context.Context, er
 	}
 	return ctx, nil
 }
+func (p *raftProxyCAServer) pollNewLeaderConn(ctx context.Context) (*grpc.ClientConn, error) {
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			conn, err := p.connSelector.LeaderConn(ctx)
+			if err != nil {
+				return nil, err
+			}
 
-func (p *raftProxyCAServer) GetRootCACertificate(ctx context.Context, r *GetRootCACertificateRequest) (*GetRootCACertificateResponse, error) {
+			client := NewHealthClient(conn)
 
-	if p.cluster.IsLeader() {
-		return p.local.GetRootCACertificate(ctx, r)
+			resp, err := client.Check(ctx, &HealthCheckRequest{Service: "Raft"})
+			if err != nil || resp.Status != HealthCheckResponse_SERVING {
+				continue
+			}
+			return conn, nil
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
 	}
-	ctx, err := p.runCtxMods(ctx)
+}
+
+func (p *raftProxyCAServer) GetRootCACertificate(ctx context.Context, r *GetRootCACertificateRequest) (*GetRootCACertificateResponse, error) {
+
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.GetRootCACertificate(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewCAClient(conn).GetRootCACertificate(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.GetRootCACertificate(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewCAClient(conn).GetRootCACertificate(ctx, r)
+		return NewCAClient(conn).GetRootCACertificate(modCtx, r)
+	}
+	return resp, err
 }
 
 type raftProxyNodeCAServer struct {
 	local        NodeCAServer
-	connSelector raftpicker.Interface
-	cluster      raftpicker.RaftCluster
+	connSelector raftselector.ConnProvider
 	ctxMods      []func(context.Context) (context.Context, error)
 }
 
-func NewRaftProxyNodeCAServer(local NodeCAServer, connSelector raftpicker.Interface, cluster raftpicker.RaftCluster, ctxMod func(context.Context) (context.Context, error)) NodeCAServer {
+func NewRaftProxyNodeCAServer(local NodeCAServer, connSelector raftselector.ConnProvider, ctxMod func(context.Context) (context.Context, error)) NodeCAServer {
 	redirectChecker := func(ctx context.Context) (context.Context, error) {
 		s, ok := transport.StreamFromContext(ctx)
 		if !ok {
@@ -769,7 +795,6 @@ func NewRaftProxyNodeCAServer(local NodeCAServer, connSelector raftpicker.Interf
 
 	return &raftProxyNodeCAServer{
 		local:        local,
-		cluster:      cluster,
 		connSelector: connSelector,
 		ctxMods:      mods,
 	}
@@ -784,63 +809,90 @@ func (p *raftProxyNodeCAServer) runCtxMods(ctx context.Context) (context.Context
 	}
 	return ctx, nil
 }
+func (p *raftProxyNodeCAServer) pollNewLeaderConn(ctx context.Context) (*grpc.ClientConn, error) {
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			conn, err := p.connSelector.LeaderConn(ctx)
+			if err != nil {
+				return nil, err
+			}
 
-func (p *raftProxyNodeCAServer) IssueNodeCertificate(ctx context.Context, r *IssueNodeCertificateRequest) (*IssueNodeCertificateResponse, error) {
+			client := NewHealthClient(conn)
 
-	if p.cluster.IsLeader() {
-		return p.local.IssueNodeCertificate(ctx, r)
+			resp, err := client.Check(ctx, &HealthCheckRequest{Service: "Raft"})
+			if err != nil || resp.Status != HealthCheckResponse_SERVING {
+				continue
+			}
+			return conn, nil
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
 	}
-	ctx, err := p.runCtxMods(ctx)
+}
+
+func (p *raftProxyNodeCAServer) IssueNodeCertificate(ctx context.Context, r *IssueNodeCertificateRequest) (*IssueNodeCertificateResponse, error) {
+
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.IssueNodeCertificate(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewNodeCAClient(conn).IssueNodeCertificate(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.IssueNodeCertificate(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewNodeCAClient(conn).IssueNodeCertificate(ctx, r)
+		return NewNodeCAClient(conn).IssueNodeCertificate(modCtx, r)
+	}
+	return resp, err
 }
 
 func (p *raftProxyNodeCAServer) NodeCertificateStatus(ctx context.Context, r *NodeCertificateStatusRequest) (*NodeCertificateStatusResponse, error) {
 
-	if p.cluster.IsLeader() {
-		return p.local.NodeCertificateStatus(ctx, r)
-	}
-	ctx, err := p.runCtxMods(ctx)
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.NodeCertificateStatus(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewNodeCAClient(conn).NodeCertificateStatus(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.NodeCertificateStatus(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewNodeCAClient(conn).NodeCertificateStatus(ctx, r)
+		return NewNodeCAClient(conn).NodeCertificateStatus(modCtx, r)
+	}
+	return resp, err
 }
 
 func (m *NodeCertificateStatusRequest) Size() (n int) {
@@ -1655,6 +1707,8 @@ var (
 	ErrIntOverflowCa   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("ca.proto", fileDescriptorCa) }
+
 var fileDescriptorCa = []byte{
 	// 493 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x94, 0x94, 0xcf, 0x6e, 0xd3, 0x40,

文件差异内容过多而无法显示
+ 400 - 333
vendor/src/github.com/docker/swarmkit/api/control.pb.go


文件差异内容过多而无法显示
+ 730 - 71
vendor/src/github.com/docker/swarmkit/api/dispatcher.pb.go


+ 48 - 2
vendor/src/github.com/docker/swarmkit/api/dispatcher.proto

@@ -47,13 +47,22 @@ service Dispatcher { // maybe dispatch, al likes this
 	// it should be terminated.
 	rpc Tasks(TasksRequest) returns (stream TasksMessage) {
 		option (docker.protobuf.plugin.tls_authorization) = { roles: "swarm-worker" roles: "swarm-manager" };
+		option deprecated = true;
+	};
+
+	// Assignments is a stream of assignments such as tasks and secrets for node.
+	// The first message in the stream contains all of the tasks and secrets
+	// that are relevant to the node. Future messages in the stream are updates to
+	// the set of assignments.
+	rpc Assignments(AssignmentsRequest) returns (stream AssignmentsMessage) {
+		option (docker.protobuf.plugin.tls_authorization) = { roles: "swarm-worker" roles: "swarm-manager" };
 	};
 }
 
 // SessionRequest starts a session.
 message SessionRequest {
 	NodeDescription description = 1;
-	// SessionID can be provided to attempt resuming an exising session. If the 
+	// SessionID can be provided to attempt resuming an exising session. If the
 	// SessionID is empty or invalid, a new SessionID will be assigned.
 	//
 	// See SessionMessage.SessionID for details.
@@ -115,7 +124,7 @@ message SessionMessage {
 	repeated WeightedPeer managers = 3;
 
 	// Symmetric encryption key distributed by the lead manager. Used by agents
-	// for securing network bootstrapping and communication. 
+	// for securing network bootstrapping and communication.
 	repeated EncryptionKey network_bootstrap_keys = 4;
 }
 
@@ -157,3 +166,40 @@ message TasksMessage {
 	repeated Task tasks = 1;
 }
 
+message AssignmentsRequest {
+	string session_id = 1 [(gogoproto.customname) = "SessionID"];
+}
+
+message AssignmentsMessage {
+	// AssignmentType specifies whether this assignment message carries
+	// the full state, or is an update to an existing state.
+	enum Type {
+		COMPLETE = 0;
+		INCREMENTAL = 1;
+	}
+
+	Type type = 1;
+
+	// AppliesTo references the previous ResultsIn value, to chain
+	// incremental updates together. For the first update in a stream,
+	// AppliesTo is empty.  If AppliesTo does not match the previously
+	// received ResultsIn, the consumer of the stream should start a new
+	// Assignments stream to re-sync.
+	string applies_to = 2;
+
+	// ResultsIn identifies the result of this assignments message, to
+	// match against the next message's AppliesTo value and protect
+	// against missed messages.
+	string results_in = 3;
+
+	// UpdateTasks is a set of new or updated tasks to run on this node.
+	// In the first assignments message, it contains all of the tasks
+	// to run on this node. Tasks outside of this set running on the node
+	// should be terminated.
+	repeated Task update_tasks = 4;
+
+	// RemoveTasks is a set of previously-assigned task IDs to remove from the
+	// assignment set. It is not used in the first assignments message of
+	// a stream.
+	repeated string remove_tasks = 5;
+}

+ 9 - 4
vendor/src/github.com/docker/swarmkit/api/duration/duration.pb.go

@@ -32,7 +32,9 @@ var _ = math.Inf
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the proto package it is being compiled against.
-const _ = proto.GoGoProtoPackageIsVersion1
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
 
 // A Duration represents a signed, fixed-length span of time represented
 // as a count of seconds and fractions of seconds at nanosecond
@@ -128,11 +130,12 @@ func valueToGoStringDuration(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringDuration(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringDuration(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -142,7 +145,7 @@ func extensionToGoStringDuration(e map[int32]github_com_gogo_protobuf_proto.Exte
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 func (m *Duration) Marshal() (data []byte, err error) {
@@ -438,6 +441,8 @@ var (
 	ErrIntOverflowDuration   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("duration.proto", fileDescriptorDuration) }
+
 var fileDescriptorDuration = []byte{
 	// 201 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0x4b, 0x29, 0x2d, 0x4a,

+ 53 - 25
vendor/src/github.com/docker/swarmkit/api/health.pb.go

@@ -21,10 +21,11 @@ import (
 	grpc "google.golang.org/grpc"
 )
 
-import raftpicker "github.com/docker/swarmkit/manager/raftpicker"
+import raftselector "github.com/docker/swarmkit/manager/raftselector"
 import codes "google.golang.org/grpc/codes"
 import metadata "google.golang.org/grpc/metadata"
 import transport "google.golang.org/grpc/transport"
+import time "time"
 
 import io "io"
 
@@ -153,11 +154,12 @@ func valueToGoStringHealth(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringHealth(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringHealth(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -167,7 +169,7 @@ func extensionToGoStringHealth(e map[int32]github_com_gogo_protobuf_proto.Extens
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 
@@ -177,7 +179,7 @@ var _ grpc.ClientConn
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the grpc package it is being compiled against.
-const _ = grpc.SupportPackageIsVersion2
+const _ = grpc.SupportPackageIsVersion3
 
 // Client API for Health service
 
@@ -239,7 +241,8 @@ var _Health_serviceDesc = grpc.ServiceDesc{
 			Handler:    _Health_Check_Handler,
 		},
 	},
-	Streams: []grpc.StreamDesc{},
+	Streams:  []grpc.StreamDesc{},
+	Metadata: fileDescriptorHealth,
 }
 
 func (m *HealthCheckRequest) Marshal() (data []byte, err error) {
@@ -319,12 +322,11 @@ func encodeVarintHealth(data []byte, offset int, v uint64) int {
 
 type raftProxyHealthServer struct {
 	local        HealthServer
-	connSelector raftpicker.Interface
-	cluster      raftpicker.RaftCluster
+	connSelector raftselector.ConnProvider
 	ctxMods      []func(context.Context) (context.Context, error)
 }
 
-func NewRaftProxyHealthServer(local HealthServer, connSelector raftpicker.Interface, cluster raftpicker.RaftCluster, ctxMod func(context.Context) (context.Context, error)) HealthServer {
+func NewRaftProxyHealthServer(local HealthServer, connSelector raftselector.ConnProvider, ctxMod func(context.Context) (context.Context, error)) HealthServer {
 	redirectChecker := func(ctx context.Context) (context.Context, error) {
 		s, ok := transport.StreamFromContext(ctx)
 		if !ok {
@@ -346,7 +348,6 @@ func NewRaftProxyHealthServer(local HealthServer, connSelector raftpicker.Interf
 
 	return &raftProxyHealthServer{
 		local:        local,
-		cluster:      cluster,
 		connSelector: connSelector,
 		ctxMods:      mods,
 	}
@@ -361,34 +362,59 @@ func (p *raftProxyHealthServer) runCtxMods(ctx context.Context) (context.Context
 	}
 	return ctx, nil
 }
+func (p *raftProxyHealthServer) pollNewLeaderConn(ctx context.Context) (*grpc.ClientConn, error) {
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			conn, err := p.connSelector.LeaderConn(ctx)
+			if err != nil {
+				return nil, err
+			}
 
-func (p *raftProxyHealthServer) Check(ctx context.Context, r *HealthCheckRequest) (*HealthCheckResponse, error) {
+			client := NewHealthClient(conn)
 
-	if p.cluster.IsLeader() {
-		return p.local.Check(ctx, r)
+			resp, err := client.Check(ctx, &HealthCheckRequest{Service: "Raft"})
+			if err != nil || resp.Status != HealthCheckResponse_SERVING {
+				continue
+			}
+			return conn, nil
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
 	}
-	ctx, err := p.runCtxMods(ctx)
+}
+
+func (p *raftProxyHealthServer) Check(ctx context.Context, r *HealthCheckRequest) (*HealthCheckResponse, error) {
+
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.Check(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewHealthClient(conn).Check(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.Check(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewHealthClient(conn).Check(ctx, r)
+		return NewHealthClient(conn).Check(modCtx, r)
+	}
+	return resp, err
 }
 
 func (m *HealthCheckRequest) Size() (n int) {
@@ -704,6 +730,8 @@ var (
 	ErrIntOverflowHealth   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("health.proto", fileDescriptorHealth) }
+
 var fileDescriptorHealth = []byte{
 	// 291 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0xc9, 0x48, 0x4d, 0xcc,

+ 157 - 98
vendor/src/github.com/docker/swarmkit/api/objects.pb.go

@@ -66,6 +66,9 @@ type Service struct {
 	ID   string      `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
 	Meta Meta        `protobuf:"bytes,2,opt,name=meta" json:"meta"`
 	Spec ServiceSpec `protobuf:"bytes,3,opt,name=spec" json:"spec"`
+	// PreviousSpec is the previous service spec that was in place before
+	// "Spec".
+	PreviousSpec *ServiceSpec `protobuf:"bytes,6,opt,name=previous_spec,json=previousSpec" json:"previous_spec,omitempty"`
 	// Runtime state of service endpoint. This may be different
 	// from the spec version because the user may not have entered
 	// the optional fields like node_port or virtual_ip and it
@@ -284,6 +287,7 @@ func (m *Service) Copy() *Service {
 		ID:           m.ID,
 		Meta:         *m.Meta.Copy(),
 		Spec:         *m.Spec.Copy(),
+		PreviousSpec: m.PreviousSpec.Copy(),
 		Endpoint:     m.Endpoint.Copy(),
 		UpdateStatus: m.UpdateStatus.Copy(),
 	}
@@ -468,11 +472,14 @@ func (this *Service) GoString() string {
 	if this == nil {
 		return "nil"
 	}
-	s := make([]string, 0, 9)
+	s := make([]string, 0, 10)
 	s = append(s, "&api.Service{")
 	s = append(s, "ID: "+fmt.Sprintf("%#v", this.ID)+",\n")
 	s = append(s, "Meta: "+strings.Replace(this.Meta.GoString(), `&`, ``, 1)+",\n")
 	s = append(s, "Spec: "+strings.Replace(this.Spec.GoString(), `&`, ``, 1)+",\n")
+	if this.PreviousSpec != nil {
+		s = append(s, "PreviousSpec: "+fmt.Sprintf("%#v", this.PreviousSpec)+",\n")
+	}
 	if this.Endpoint != nil {
 		s = append(s, "Endpoint: "+fmt.Sprintf("%#v", this.Endpoint)+",\n")
 	}
@@ -596,11 +603,12 @@ func valueToGoStringObjects(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringObjects(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringObjects(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -610,7 +618,7 @@ func extensionToGoStringObjects(e map[int32]github_com_gogo_protobuf_proto.Exten
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 func (m *Meta) Marshal() (data []byte, err error) {
@@ -802,6 +810,16 @@ func (m *Service) MarshalTo(data []byte) (int, error) {
 		}
 		i += n14
 	}
+	if m.PreviousSpec != nil {
+		data[i] = 0x32
+		i++
+		i = encodeVarintObjects(data, i, uint64(m.PreviousSpec.Size()))
+		n15, err := m.PreviousSpec.MarshalTo(data[i:])
+		if err != nil {
+			return 0, err
+		}
+		i += n15
+	}
 	return i, nil
 }
 
@@ -824,11 +842,11 @@ func (m *Endpoint) MarshalTo(data []byte) (int, error) {
 		data[i] = 0xa
 		i++
 		i = encodeVarintObjects(data, i, uint64(m.Spec.Size()))
-		n15, err := m.Spec.MarshalTo(data[i:])
+		n16, err := m.Spec.MarshalTo(data[i:])
 		if err != nil {
 			return 0, err
 		}
-		i += n15
+		i += n16
 	}
 	if len(m.Ports) > 0 {
 		for _, msg := range m.Ports {
@@ -911,19 +929,19 @@ func (m *Task) MarshalTo(data []byte) (int, error) {
 	data[i] = 0x12
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.Meta.Size()))
-	n16, err := m.Meta.MarshalTo(data[i:])
+	n17, err := m.Meta.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n16
+	i += n17
 	data[i] = 0x1a
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.Spec.Size()))
-	n17, err := m.Spec.MarshalTo(data[i:])
+	n18, err := m.Spec.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n17
+	i += n18
 	if len(m.ServiceID) > 0 {
 		data[i] = 0x22
 		i++
@@ -944,27 +962,27 @@ func (m *Task) MarshalTo(data []byte) (int, error) {
 	data[i] = 0x3a
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.Annotations.Size()))
-	n18, err := m.Annotations.MarshalTo(data[i:])
+	n19, err := m.Annotations.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n18
+	i += n19
 	data[i] = 0x42
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.ServiceAnnotations.Size()))
-	n19, err := m.ServiceAnnotations.MarshalTo(data[i:])
+	n20, err := m.ServiceAnnotations.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n19
+	i += n20
 	data[i] = 0x4a
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.Status.Size()))
-	n20, err := m.Status.MarshalTo(data[i:])
+	n21, err := m.Status.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n20
+	i += n21
 	if m.DesiredState != 0 {
 		data[i] = 0x50
 		i++
@@ -986,21 +1004,21 @@ func (m *Task) MarshalTo(data []byte) (int, error) {
 		data[i] = 0x62
 		i++
 		i = encodeVarintObjects(data, i, uint64(m.Endpoint.Size()))
-		n21, err := m.Endpoint.MarshalTo(data[i:])
+		n22, err := m.Endpoint.MarshalTo(data[i:])
 		if err != nil {
 			return 0, err
 		}
-		i += n21
+		i += n22
 	}
 	if m.LogDriver != nil {
 		data[i] = 0x6a
 		i++
 		i = encodeVarintObjects(data, i, uint64(m.LogDriver.Size()))
-		n22, err := m.LogDriver.MarshalTo(data[i:])
+		n23, err := m.LogDriver.MarshalTo(data[i:])
 		if err != nil {
 			return 0, err
 		}
-		i += n22
+		i += n23
 	}
 	return i, nil
 }
@@ -1024,11 +1042,11 @@ func (m *NetworkAttachment) MarshalTo(data []byte) (int, error) {
 		data[i] = 0xa
 		i++
 		i = encodeVarintObjects(data, i, uint64(m.Network.Size()))
-		n23, err := m.Network.MarshalTo(data[i:])
+		n24, err := m.Network.MarshalTo(data[i:])
 		if err != nil {
 			return 0, err
 		}
-		i += n23
+		i += n24
 	}
 	if len(m.Addresses) > 0 {
 		for _, s := range m.Addresses {
@@ -1087,38 +1105,38 @@ func (m *Network) MarshalTo(data []byte) (int, error) {
 	data[i] = 0x12
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.Meta.Size()))
-	n24, err := m.Meta.MarshalTo(data[i:])
+	n25, err := m.Meta.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n24
+	i += n25
 	data[i] = 0x1a
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.Spec.Size()))
-	n25, err := m.Spec.MarshalTo(data[i:])
+	n26, err := m.Spec.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n25
+	i += n26
 	if m.DriverState != nil {
 		data[i] = 0x22
 		i++
 		i = encodeVarintObjects(data, i, uint64(m.DriverState.Size()))
-		n26, err := m.DriverState.MarshalTo(data[i:])
+		n27, err := m.DriverState.MarshalTo(data[i:])
 		if err != nil {
 			return 0, err
 		}
-		i += n26
+		i += n27
 	}
 	if m.IPAM != nil {
 		data[i] = 0x2a
 		i++
 		i = encodeVarintObjects(data, i, uint64(m.IPAM.Size()))
-		n27, err := m.IPAM.MarshalTo(data[i:])
+		n28, err := m.IPAM.MarshalTo(data[i:])
 		if err != nil {
 			return 0, err
 		}
-		i += n27
+		i += n28
 	}
 	return i, nil
 }
@@ -1147,27 +1165,27 @@ func (m *Cluster) MarshalTo(data []byte) (int, error) {
 	data[i] = 0x12
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.Meta.Size()))
-	n28, err := m.Meta.MarshalTo(data[i:])
+	n29, err := m.Meta.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n28
+	i += n29
 	data[i] = 0x1a
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.Spec.Size()))
-	n29, err := m.Spec.MarshalTo(data[i:])
+	n30, err := m.Spec.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n29
+	i += n30
 	data[i] = 0x22
 	i++
 	i = encodeVarintObjects(data, i, uint64(m.RootCA.Size()))
-	n30, err := m.RootCA.MarshalTo(data[i:])
+	n31, err := m.RootCA.MarshalTo(data[i:])
 	if err != nil {
 		return 0, err
 	}
-	i += n30
+	i += n31
 	if len(m.NetworkBootstrapKeys) > 0 {
 		for _, msg := range m.NetworkBootstrapKeys {
 			data[i] = 0x2a
@@ -1281,6 +1299,10 @@ func (m *Service) Size() (n int) {
 		l = m.UpdateStatus.Size()
 		n += 1 + l + sovObjects(uint64(l))
 	}
+	if m.PreviousSpec != nil {
+		l = m.PreviousSpec.Size()
+		n += 1 + l + sovObjects(uint64(l))
+	}
 	return n
 }
 
@@ -1489,6 +1511,7 @@ func (this *Service) String() string {
 		`Spec:` + strings.Replace(strings.Replace(this.Spec.String(), "ServiceSpec", "ServiceSpec", 1), `&`, ``, 1) + `,`,
 		`Endpoint:` + strings.Replace(fmt.Sprintf("%v", this.Endpoint), "Endpoint", "Endpoint", 1) + `,`,
 		`UpdateStatus:` + strings.Replace(fmt.Sprintf("%v", this.UpdateStatus), "UpdateStatus", "UpdateStatus", 1) + `,`,
+		`PreviousSpec:` + strings.Replace(fmt.Sprintf("%v", this.PreviousSpec), "ServiceSpec", "ServiceSpec", 1) + `,`,
 		`}`,
 	}, "")
 	return s
@@ -2215,6 +2238,39 @@ func (m *Service) Unmarshal(data []byte) error {
 				return err
 			}
 			iNdEx = postIndex
+		case 6:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field PreviousSpec", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowObjects
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := data[iNdEx]
+				iNdEx++
+				msglen |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthObjects
+			}
+			postIndex := iNdEx + msglen
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.PreviousSpec == nil {
+				m.PreviousSpec = &ServiceSpec{}
+			}
+			if err := m.PreviousSpec.Unmarshal(data[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
 			skippy, err := skipObjects(data[iNdEx:])
@@ -3581,70 +3637,73 @@ var (
 	ErrIntOverflowObjects   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("objects.proto", fileDescriptorObjects) }
+
 var fileDescriptorObjects = []byte{
-	// 1009 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xbc, 0x56, 0xcf, 0x6e, 0x1b, 0x45,
-	0x1c, 0xce, 0xda, 0x1b, 0xdb, 0xfb, 0x73, 0x1c, 0x89, 0xa1, 0xaa, 0xb6, 0x21, 0xd8, 0xc1, 0x15,
-	0xa8, 0x87, 0xca, 0x15, 0xa5, 0x20, 0x2a, 0x5a, 0x21, 0xff, 0x13, 0x58, 0x25, 0x10, 0x4d, 0x4b,
-	0x7a, 0x5c, 0x4d, 0x76, 0xa7, 0x66, 0xb1, 0xbd, 0xb3, 0x9a, 0x19, 0xbb, 0xf2, 0x0d, 0xf1, 0x00,
-	0x48, 0xbc, 0x00, 0xaf, 0xc2, 0x35, 0x07, 0x0e, 0x1c, 0x39, 0x59, 0xc4, 0x37, 0x4e, 0xf0, 0x08,
-	0x68, 0x66, 0x67, 0xed, 0x8d, 0xbc, 0x0e, 0x8d, 0x84, 0x72, 0x9b, 0xd9, 0xf9, 0xbe, 0x6f, 0x7e,
-	0xff, 0x67, 0xa1, 0xc6, 0xce, 0xbe, 0xa7, 0xbe, 0x14, 0xad, 0x98, 0x33, 0xc9, 0x10, 0x0a, 0x98,
+	// 1029 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xbc, 0x56, 0x4d, 0x6f, 0x1b, 0x45,
+	0x18, 0xce, 0xda, 0x1b, 0xdb, 0xfb, 0x3a, 0x8e, 0xc4, 0x50, 0x55, 0xdb, 0x10, 0xec, 0xe0, 0x0a,
+	0xd4, 0x43, 0xe5, 0x8a, 0x52, 0x10, 0x15, 0xad, 0x90, 0xbf, 0x04, 0x56, 0x09, 0x44, 0xd3, 0x92,
+	0x1e, 0x57, 0x93, 0xdd, 0xa9, 0x59, 0x6c, 0xef, 0xac, 0x66, 0xc6, 0xae, 0x7c, 0x43, 0xfc, 0x00,
+	0x7e, 0x02, 0x7f, 0x85, 0x6b, 0x0e, 0x1c, 0xb8, 0xc1, 0xc9, 0x22, 0xbe, 0x71, 0x82, 0x9f, 0x80,
+	0x66, 0x76, 0xd6, 0xde, 0xc8, 0xeb, 0x90, 0x4a, 0x28, 0xb7, 0x99, 0x9d, 0xe7, 0x79, 0xde, 0xcf,
+	0x79, 0x77, 0xa0, 0xc6, 0xce, 0xbe, 0xa7, 0xbe, 0x14, 0xad, 0x98, 0x33, 0xc9, 0x10, 0x0a, 0x98,
 	0x3f, 0xa2, 0xbc, 0x25, 0x5e, 0x13, 0x3e, 0x19, 0x85, 0xb2, 0x35, 0xfb, 0xf0, 0xa0, 0x2a, 0xe7,
 	0x31, 0x35, 0x80, 0x83, 0xaa, 0x88, 0xa9, 0x9f, 0x6e, 0xee, 0xc8, 0x70, 0x42, 0x85, 0x24, 0x93,
 	0xf8, 0xc1, 0x6a, 0x65, 0x8e, 0x6e, 0x0d, 0xd9, 0x90, 0xe9, 0xe5, 0x03, 0xb5, 0x4a, 0xbe, 0x36,
-	0x7f, 0xb5, 0xc0, 0x3e, 0xa6, 0x92, 0xa0, 0xcf, 0xa0, 0x3c, 0xa3, 0x5c, 0x84, 0x2c, 0x72, 0xad,
-	0x23, 0xeb, 0x5e, 0xf5, 0xe1, 0x3b, 0xad, 0xcd, 0x9b, 0x5b, 0xa7, 0x09, 0xa4, 0x63, 0x9f, 0x2f,
-	0x1a, 0x3b, 0x38, 0x65, 0xa0, 0x27, 0x00, 0x3e, 0xa7, 0x44, 0xd2, 0xc0, 0x23, 0xd2, 0x2d, 0x68,
-	0xfe, 0xbb, 0x79, 0xfc, 0x17, 0xa9, 0x51, 0xd8, 0x31, 0x84, 0xb6, 0x54, 0xec, 0x69, 0x1c, 0xa4,
-	0xec, 0xe2, 0x1b, 0xb1, 0x0d, 0xa1, 0x2d, 0x9b, 0x7f, 0x15, 0xc1, 0xfe, 0x9a, 0x05, 0x14, 0xdd,
-	0x86, 0x42, 0x18, 0x68, 0xe3, 0x9d, 0x4e, 0x69, 0xb9, 0x68, 0x14, 0x06, 0x3d, 0x5c, 0x08, 0x03,
-	0xf4, 0x10, 0xec, 0x09, 0x95, 0xc4, 0x98, 0xe5, 0xe6, 0x09, 0xab, 0x08, 0x18, 0x9f, 0x34, 0x16,
-	0x7d, 0x02, 0xb6, 0x0a, 0xab, 0x31, 0xe6, 0x30, 0x8f, 0xa3, 0xee, 0x7c, 0x1e, 0x53, 0x3f, 0xe5,
-	0x29, 0x3c, 0xea, 0x43, 0x35, 0xa0, 0xc2, 0xe7, 0x61, 0x2c, 0x55, 0x24, 0x6d, 0x4d, 0xbf, 0xbb,
-	0x8d, 0xde, 0x5b, 0x43, 0x71, 0x96, 0x87, 0x9e, 0x40, 0x49, 0x48, 0x22, 0xa7, 0xc2, 0xdd, 0xd5,
-	0x0a, 0xf5, 0xad, 0x06, 0x68, 0x94, 0x31, 0xc1, 0x70, 0xd0, 0x97, 0xb0, 0x3f, 0x21, 0x11, 0x19,
-	0x52, 0xee, 0x19, 0x95, 0x92, 0x56, 0x79, 0x2f, 0xd7, 0xf5, 0x04, 0x99, 0x08, 0xe1, 0xda, 0x24,
-	0xbb, 0x45, 0x7d, 0x00, 0x22, 0x25, 0xf1, 0xbf, 0x9b, 0xd0, 0x48, 0xba, 0x65, 0xad, 0xf2, 0x7e,
-	0xae, 0x2d, 0x54, 0xbe, 0x66, 0x7c, 0xd4, 0x5e, 0x81, 0x71, 0x86, 0x88, 0xbe, 0x80, 0xaa, 0x4f,
-	0xb9, 0x0c, 0x5f, 0x85, 0x3e, 0x91, 0xd4, 0xad, 0x68, 0x9d, 0x46, 0x9e, 0x4e, 0x77, 0x0d, 0x33,
-	0x4e, 0x65, 0x99, 0xcd, 0x9f, 0x0b, 0x50, 0x7e, 0x4e, 0xf9, 0x2c, 0xf4, 0xff, 0xdf, 0x74, 0x3f,
-	0xbe, 0x94, 0xee, 0x5c, 0xcb, 0xcc, 0xb5, 0x1b, 0x19, 0xff, 0x14, 0x2a, 0x34, 0x0a, 0x62, 0x16,
-	0x46, 0xd2, 0xa4, 0x3b, 0xb7, 0x5a, 0xfa, 0x06, 0x83, 0x57, 0x68, 0xd4, 0x87, 0x5a, 0x52, 0xc5,
-	0xde, 0xa5, 0x5c, 0x1f, 0xe5, 0xd1, 0xbf, 0xd5, 0x40, 0x93, 0xa4, 0xbd, 0x69, 0x66, 0xd7, 0xfc,
-	0xa5, 0x00, 0x95, 0x54, 0x1d, 0x3d, 0x32, 0x8e, 0x58, 0xdb, 0xa5, 0x52, 0xac, 0xf2, 0xc4, 0xf8,
-	0xf0, 0x08, 0x76, 0x63, 0xc6, 0xa5, 0x70, 0x0b, 0x47, 0xc5, 0x6d, 0xd5, 0x76, 0xc2, 0xb8, 0xec,
-	0xb2, 0xe8, 0x55, 0x38, 0xc4, 0x09, 0x18, 0xbd, 0x84, 0xea, 0x2c, 0xe4, 0x72, 0x4a, 0xc6, 0x5e,
-	0x18, 0x0b, 0xb7, 0xa8, 0xb9, 0x1f, 0x5c, 0x75, 0x65, 0xeb, 0x34, 0xc1, 0x0f, 0x4e, 0x3a, 0xfb,
-	0xcb, 0x45, 0x03, 0x56, 0x5b, 0x81, 0xc1, 0x48, 0x0d, 0x62, 0x71, 0x70, 0x0c, 0xce, 0xea, 0x04,
-	0xdd, 0x07, 0x88, 0x92, 0xe2, 0xf2, 0x56, 0xe9, 0xae, 0x2d, 0x17, 0x0d, 0xc7, 0x94, 0xdc, 0xa0,
-	0x87, 0x1d, 0x03, 0x18, 0x04, 0x08, 0x81, 0x4d, 0x82, 0x80, 0xeb, 0xe4, 0x3b, 0x58, 0xaf, 0x9b,
-	0xbf, 0xed, 0x82, 0xfd, 0x82, 0x88, 0xd1, 0x4d, 0x0f, 0x08, 0x75, 0xe7, 0x46, 0xb9, 0xdc, 0x07,
-	0x10, 0x49, 0x25, 0x29, 0x77, 0xec, 0xb5, 0x3b, 0xa6, 0xbe, 0x94, 0x3b, 0x06, 0x90, 0xb8, 0x23,
-	0xc6, 0x4c, 0xea, 0xca, 0xb0, 0xb1, 0x5e, 0xa3, 0xbb, 0x50, 0x8e, 0x58, 0xa0, 0xe9, 0x25, 0x4d,
-	0x87, 0xe5, 0xa2, 0x51, 0x52, 0xc3, 0x60, 0xd0, 0xc3, 0x25, 0x75, 0x34, 0x08, 0x54, 0xc7, 0x91,
-	0x28, 0x62, 0x92, 0xa8, 0x71, 0x22, 0x4c, 0xe7, 0xe6, 0xd6, 0x75, 0x7b, 0x0d, 0x4b, 0x3b, 0x2e,
-	0xc3, 0x44, 0xa7, 0xf0, 0x76, 0x6a, 0x6f, 0x56, 0xb0, 0x72, 0x1d, 0x41, 0x64, 0x14, 0x32, 0x27,
-	0x99, 0x09, 0xe7, 0x6c, 0x9f, 0x70, 0x3a, 0x82, 0x79, 0x13, 0xae, 0x03, 0xb5, 0x80, 0x8a, 0x90,
-	0xd3, 0x40, 0xf7, 0x0e, 0x75, 0xe1, 0xc8, 0xba, 0xb7, 0xbf, 0xe5, 0xd1, 0x30, 0x22, 0x14, 0xef,
-	0x19, 0x8e, 0xde, 0xa1, 0x36, 0x54, 0x4c, 0xdd, 0x08, 0xb7, 0xaa, 0x6b, 0xf7, 0x0d, 0x27, 0xdb,
-	0x8a, 0x76, 0xa9, 0xf7, 0xf7, 0xae, 0xd5, 0xfb, 0x8f, 0x01, 0xc6, 0x6c, 0xe8, 0x05, 0x3c, 0x9c,
-	0x51, 0xee, 0xd6, 0x34, 0xf7, 0x20, 0x8f, 0xdb, 0xd3, 0x08, 0xec, 0x8c, 0xd9, 0x30, 0x59, 0x36,
-	0x7f, 0xb4, 0xe0, 0xad, 0x0d, 0xa3, 0xd0, 0xc7, 0x50, 0x36, 0x66, 0x5d, 0xf5, 0x7c, 0x1b, 0x1e,
-	0x4e, 0xb1, 0xe8, 0x10, 0x1c, 0xd5, 0x23, 0x54, 0x08, 0x9a, 0x74, 0xbf, 0x83, 0xd7, 0x1f, 0x90,
-	0x0b, 0x65, 0x32, 0x0e, 0x89, 0x3a, 0x2b, 0xea, 0xb3, 0x74, 0xdb, 0xfc, 0xa9, 0x00, 0x65, 0x23,
-	0x76, 0xd3, 0x83, 0xd8, 0x5c, 0xbb, 0xd1, 0x59, 0x4f, 0x61, 0x2f, 0x09, 0xa7, 0x29, 0x09, 0xfb,
-	0x3f, 0x83, 0x5a, 0x4d, 0xf0, 0x49, 0x39, 0x3c, 0x05, 0x3b, 0x8c, 0xc9, 0xc4, 0x0c, 0xe1, 0xdc,
-	0x9b, 0x07, 0x27, 0xed, 0xe3, 0x6f, 0xe2, 0xa4, 0xb2, 0x2b, 0xcb, 0x45, 0xc3, 0x56, 0x1f, 0xb0,
-	0xa6, 0x35, 0xff, 0x2e, 0x40, 0xb9, 0x3b, 0x9e, 0x0a, 0x49, 0xf9, 0x4d, 0x07, 0xc4, 0x5c, 0xbb,
-	0x11, 0x90, 0x2e, 0x94, 0x39, 0x63, 0xd2, 0xf3, 0xc9, 0x55, 0xb1, 0xc0, 0x8c, 0xc9, 0x6e, 0xbb,
-	0xb3, 0xaf, 0x88, 0x6a, 0x90, 0x24, 0x7b, 0x5c, 0x52, 0xd4, 0x2e, 0x41, 0x2f, 0xe1, 0x76, 0x3a,
-	0x7e, 0xcf, 0x18, 0x93, 0x42, 0x72, 0x12, 0x7b, 0x23, 0x3a, 0x57, 0xaf, 0x55, 0x71, 0xdb, 0x3f,
-	0x45, 0x3f, 0xf2, 0xf9, 0x5c, 0x07, 0xea, 0x19, 0x9d, 0xe3, 0x5b, 0x46, 0xa0, 0x93, 0xf2, 0x9f,
-	0xd1, 0xb9, 0x40, 0x9f, 0xc3, 0x21, 0x5d, 0xc1, 0x94, 0xa2, 0x37, 0x26, 0x13, 0xf5, 0xb0, 0x78,
-	0xfe, 0x98, 0xf9, 0x23, 0x3d, 0xdb, 0x6c, 0x7c, 0x87, 0x66, 0xa5, 0xbe, 0x4a, 0x10, 0x5d, 0x05,
-	0xe8, 0x1c, 0x9e, 0x5f, 0xd4, 0x77, 0xfe, 0xb8, 0xa8, 0xef, 0xfc, 0x73, 0x51, 0xb7, 0x7e, 0x58,
-	0xd6, 0xad, 0xf3, 0x65, 0xdd, 0xfa, 0x7d, 0x59, 0xb7, 0xfe, 0x5c, 0xd6, 0xad, 0xb3, 0x92, 0xfe,
-	0xbd, 0xfd, 0xe8, 0xdf, 0x00, 0x00, 0x00, 0xff, 0xff, 0x56, 0x49, 0xe6, 0x55, 0x4e, 0x0b, 0x00,
-	0x00,
+	0x7f, 0xb1, 0xc0, 0x3e, 0xa6, 0x92, 0xa0, 0xcf, 0xa0, 0x3c, 0xa3, 0x5c, 0x84, 0x2c, 0x72, 0xad,
+	0x23, 0xeb, 0x5e, 0xf5, 0xe1, 0x3b, 0xad, 0x4d, 0xcb, 0xad, 0xd3, 0x04, 0xd2, 0xb1, 0xcf, 0x17,
+	0x8d, 0x1d, 0x9c, 0x32, 0xd0, 0x13, 0x00, 0x9f, 0x53, 0x22, 0x69, 0xe0, 0x11, 0xe9, 0x16, 0x34,
+	0xff, 0xdd, 0x3c, 0xfe, 0x8b, 0xd4, 0x29, 0xec, 0x18, 0x42, 0x5b, 0x2a, 0xf6, 0x34, 0x0e, 0x52,
+	0x76, 0xf1, 0x5a, 0x6c, 0x43, 0x68, 0xcb, 0xe6, 0x5f, 0x45, 0xb0, 0xbf, 0x66, 0x01, 0x45, 0xb7,
+	0xa1, 0x10, 0x06, 0xda, 0x79, 0xa7, 0x53, 0x5a, 0x2e, 0x1a, 0x85, 0x41, 0x0f, 0x17, 0xc2, 0x00,
+	0x3d, 0x04, 0x7b, 0x42, 0x25, 0x31, 0x6e, 0xb9, 0x79, 0xc2, 0x2a, 0x03, 0x26, 0x26, 0x8d, 0x45,
+	0x9f, 0x80, 0xad, 0xd2, 0x6a, 0x9c, 0x39, 0xcc, 0xe3, 0x28, 0x9b, 0xcf, 0x63, 0xea, 0xa7, 0x3c,
+	0x85, 0x47, 0x7d, 0xa8, 0x06, 0x54, 0xf8, 0x3c, 0x8c, 0xa5, 0xca, 0xa4, 0xad, 0xe9, 0x77, 0xb7,
+	0xd1, 0x7b, 0x6b, 0x28, 0xce, 0xf2, 0xd0, 0x13, 0x28, 0x09, 0x49, 0xe4, 0x54, 0xb8, 0xbb, 0x5a,
+	0xa1, 0xbe, 0xd5, 0x01, 0x8d, 0x32, 0x2e, 0x18, 0x0e, 0xfa, 0x12, 0xf6, 0x27, 0x24, 0x22, 0x43,
+	0xca, 0x3d, 0xa3, 0x52, 0xd2, 0x2a, 0xef, 0xe5, 0x86, 0x9e, 0x20, 0x13, 0x21, 0x5c, 0x9b, 0x64,
+	0xb7, 0xa8, 0x0f, 0x40, 0xa4, 0x24, 0xfe, 0x77, 0x13, 0x1a, 0x49, 0xb7, 0xac, 0x55, 0xde, 0xcf,
+	0xf5, 0x85, 0xca, 0xd7, 0x8c, 0x8f, 0xda, 0x2b, 0x30, 0xce, 0x10, 0xd1, 0x17, 0x50, 0xf5, 0x29,
+	0x97, 0xe1, 0xab, 0xd0, 0x27, 0x92, 0xba, 0x15, 0xad, 0xd3, 0xc8, 0xd3, 0xe9, 0xae, 0x61, 0x26,
+	0xa8, 0x2c, 0xb3, 0xf9, 0x7b, 0x01, 0xca, 0xcf, 0x29, 0x9f, 0x85, 0xfe, 0xff, 0x5b, 0xee, 0xc7,
+	0x97, 0xca, 0x9d, 0xeb, 0x99, 0x31, 0xbb, 0x51, 0xf1, 0x4f, 0xa1, 0x42, 0xa3, 0x20, 0x66, 0x61,
+	0x24, 0x4d, 0xb9, 0x73, 0xbb, 0xa5, 0x6f, 0x30, 0x78, 0x85, 0x46, 0x7d, 0xa8, 0x25, 0x5d, 0xec,
+	0x5d, 0xaa, 0xf5, 0x51, 0x1e, 0xfd, 0x5b, 0x0d, 0x34, 0x45, 0xda, 0x9b, 0x66, 0x76, 0xa8, 0x07,
+	0xb5, 0x98, 0xd3, 0x59, 0xc8, 0xa6, 0xc2, 0xd3, 0x41, 0x94, 0xae, 0x15, 0x04, 0xde, 0x4b, 0x59,
+	0x6a, 0xd7, 0xfc, 0xb9, 0x00, 0x95, 0xd4, 0x47, 0xf4, 0xc8, 0xa4, 0xc3, 0xda, 0xee, 0x50, 0x8a,
+	0xd5, 0x52, 0x49, 0x26, 0x1e, 0xc1, 0x6e, 0xcc, 0xb8, 0x14, 0x6e, 0xe1, 0xa8, 0xb8, 0xad, 0x67,
+	0x4f, 0x18, 0x97, 0x5d, 0x16, 0xbd, 0x0a, 0x87, 0x38, 0x01, 0xa3, 0x97, 0x50, 0x9d, 0x85, 0x5c,
+	0x4e, 0xc9, 0xd8, 0x0b, 0x63, 0xe1, 0x16, 0x35, 0xf7, 0x83, 0xab, 0x4c, 0xb6, 0x4e, 0x13, 0xfc,
+	0xe0, 0xa4, 0xb3, 0xbf, 0x5c, 0x34, 0x60, 0xb5, 0x15, 0x18, 0x8c, 0xd4, 0x20, 0x16, 0x07, 0xc7,
+	0xe0, 0xac, 0x4e, 0xd0, 0x7d, 0x80, 0x28, 0x69, 0x51, 0x6f, 0xd5, 0x34, 0xb5, 0xe5, 0xa2, 0xe1,
+	0x98, 0xc6, 0x1d, 0xf4, 0xb0, 0x63, 0x00, 0x83, 0x00, 0x21, 0xb0, 0x49, 0x10, 0x70, 0xdd, 0x42,
+	0x0e, 0xd6, 0xeb, 0xe6, 0xaf, 0xbb, 0x60, 0xbf, 0x20, 0x62, 0x74, 0xd3, 0x63, 0x46, 0xd9, 0xdc,
+	0x68, 0xba, 0xfb, 0x00, 0x22, 0x29, 0xa5, 0x0a, 0xc7, 0x5e, 0x87, 0x63, 0x0a, 0xac, 0xc2, 0x31,
+	0x80, 0x24, 0x1c, 0x31, 0x66, 0x52, 0xf7, 0x97, 0x8d, 0xf5, 0x1a, 0xdd, 0x85, 0x72, 0xc4, 0x02,
+	0x4d, 0x2f, 0x69, 0x3a, 0x2c, 0x17, 0x8d, 0x92, 0x1a, 0x29, 0x83, 0x1e, 0x2e, 0xa9, 0xa3, 0x41,
+	0xa0, 0xee, 0x2d, 0x89, 0x22, 0x26, 0x89, 0x1a, 0x4a, 0xc2, 0xdc, 0xff, 0xdc, 0xc6, 0x6a, 0xaf,
+	0x61, 0xe9, 0xbd, 0xcd, 0x30, 0xd1, 0x29, 0xbc, 0x9d, 0xfa, 0x9b, 0x15, 0xac, 0xbc, 0x89, 0x20,
+	0x32, 0x0a, 0x99, 0x93, 0xcc, 0x9c, 0x74, 0xb6, 0xcf, 0x49, 0x9d, 0xc1, 0xbc, 0x39, 0xd9, 0x81,
+	0x5a, 0x40, 0x45, 0xc8, 0x69, 0xa0, 0x6f, 0x20, 0x75, 0xe1, 0xc8, 0xba, 0xb7, 0xbf, 0xe5, 0xd7,
+	0x63, 0x44, 0x28, 0xde, 0x33, 0x1c, 0xbd, 0x43, 0x6d, 0xa8, 0x98, 0xbe, 0x11, 0x6e, 0x55, 0xf7,
+	0xee, 0x35, 0xe7, 0xe3, 0x8a, 0x76, 0x69, 0x82, 0xec, 0xbd, 0xd1, 0x04, 0x79, 0x0c, 0x30, 0x66,
+	0x43, 0x2f, 0xe0, 0xe1, 0x8c, 0x72, 0xb7, 0xa6, 0xb9, 0x07, 0x79, 0xdc, 0x9e, 0x46, 0x60, 0x67,
+	0xcc, 0x86, 0xc9, 0xb2, 0xf9, 0xa3, 0x05, 0x6f, 0x6d, 0x38, 0x85, 0x3e, 0x86, 0xb2, 0x71, 0xeb,
+	0xaa, 0x47, 0x80, 0xe1, 0xe1, 0x14, 0x8b, 0x0e, 0xc1, 0x51, 0x77, 0x84, 0x0a, 0x41, 0x93, 0xdb,
+	0xef, 0xe0, 0xf5, 0x07, 0xe4, 0x42, 0x99, 0x8c, 0x43, 0xa2, 0xce, 0x8a, 0xfa, 0x2c, 0xdd, 0x36,
+	0x7f, 0x2a, 0x40, 0xd9, 0x88, 0xdd, 0xf4, 0x38, 0x37, 0x66, 0x37, 0x6e, 0xd6, 0x53, 0xd8, 0x4b,
+	0xd2, 0x69, 0x5a, 0xc2, 0xfe, 0xcf, 0xa4, 0x56, 0x13, 0x7c, 0xd2, 0x0e, 0x4f, 0xc1, 0x0e, 0x63,
+	0x32, 0x31, 0xa3, 0x3c, 0xd7, 0xf2, 0xe0, 0xa4, 0x7d, 0xfc, 0x4d, 0x9c, 0x74, 0x76, 0x65, 0xb9,
+	0x68, 0xd8, 0xea, 0x03, 0xd6, 0xb4, 0xe6, 0xdf, 0x05, 0x28, 0x77, 0xc7, 0x53, 0x21, 0x29, 0xbf,
+	0xe9, 0x84, 0x18, 0xb3, 0x1b, 0x09, 0xe9, 0x42, 0x99, 0x33, 0x26, 0x3d, 0x9f, 0x5c, 0x95, 0x0b,
+	0xcc, 0x98, 0xec, 0xb6, 0x3b, 0xfb, 0x8a, 0xa8, 0x06, 0x49, 0xb2, 0xc7, 0x25, 0x45, 0xed, 0x12,
+	0xf4, 0x12, 0x6e, 0xa7, 0xe3, 0xf7, 0x8c, 0x31, 0x29, 0x24, 0x27, 0xb1, 0x37, 0xa2, 0x73, 0xf5,
+	0xcf, 0x2b, 0x6e, 0x7b, 0x99, 0xf4, 0x23, 0x9f, 0xcf, 0x75, 0xa2, 0x9e, 0xd1, 0x39, 0xbe, 0x65,
+	0x04, 0x3a, 0x29, 0xff, 0x19, 0x9d, 0x0b, 0xf4, 0x39, 0x1c, 0xd2, 0x15, 0x4c, 0x29, 0x7a, 0x63,
+	0x32, 0x51, 0x3f, 0x16, 0xcf, 0x1f, 0x33, 0x7f, 0xa4, 0x67, 0x9b, 0x8d, 0xef, 0xd0, 0xac, 0xd4,
+	0x57, 0x09, 0xa2, 0xab, 0x00, 0x9d, 0xc3, 0xf3, 0x8b, 0xfa, 0xce, 0x1f, 0x17, 0xf5, 0x9d, 0x7f,
+	0x2e, 0xea, 0xd6, 0x0f, 0xcb, 0xba, 0x75, 0xbe, 0xac, 0x5b, 0xbf, 0x2d, 0xeb, 0xd6, 0x9f, 0xcb,
+	0xba, 0x75, 0x56, 0xd2, 0x8f, 0xe4, 0x8f, 0xfe, 0x0d, 0x00, 0x00, 0xff, 0xff, 0x6a, 0xb2, 0x97,
+	0xcc, 0x94, 0x0b, 0x00, 0x00,
 }

+ 4 - 0
vendor/src/github.com/docker/swarmkit/api/objects.proto

@@ -57,6 +57,10 @@ message Service {
 
 	ServiceSpec spec = 3 [(gogoproto.nullable) = false];
 
+	// PreviousSpec is the previous service spec that was in place before
+	// "Spec".
+	ServiceSpec previous_spec = 6;
+
 	// Runtime state of service endpoint. This may be different
 	// from the spec version because the user may not have entered
 	// the optional fields like node_port or virtual_ip and it

+ 132 - 76
vendor/src/github.com/docker/swarmkit/api/raft.pb.go

@@ -23,10 +23,11 @@ import (
 	grpc "google.golang.org/grpc"
 )
 
-import raftpicker "github.com/docker/swarmkit/manager/raftpicker"
+import raftselector "github.com/docker/swarmkit/manager/raftselector"
 import codes "google.golang.org/grpc/codes"
 import metadata "google.golang.org/grpc/metadata"
 import transport "google.golang.org/grpc/transport"
+import time "time"
 
 import io "io"
 
@@ -163,7 +164,7 @@ func (m *InternalRaftRequest) Reset()                    { *m = InternalRaftRequ
 func (*InternalRaftRequest) ProtoMessage()               {}
 func (*InternalRaftRequest) Descriptor() ([]byte, []int) { return fileDescriptorRaft, []int{9} }
 
-// StoreAction defines a taret and operation to apply on the storage system.
+// StoreAction defines a target and operation to apply on the storage system.
 type StoreAction struct {
 	Action StoreActionKind `protobuf:"varint,1,opt,name=action,proto3,enum=docker.swarmkit.v1.StoreActionKind" json:"action,omitempty"`
 	// Types that are valid to be assigned to Target:
@@ -797,11 +798,12 @@ func valueToGoStringRaft(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringRaft(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringRaft(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -811,7 +813,7 @@ func extensionToGoStringRaft(e map[int32]github_com_gogo_protobuf_proto.Extensio
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 
@@ -821,7 +823,7 @@ var _ grpc.ClientConn
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the grpc package it is being compiled against.
-const _ = grpc.SupportPackageIsVersion2
+const _ = grpc.SupportPackageIsVersion3
 
 // Client API for Raft service
 
@@ -922,7 +924,8 @@ var _Raft_serviceDesc = grpc.ServiceDesc{
 			Handler:    _Raft_ResolveAddress_Handler,
 		},
 	},
-	Streams: []grpc.StreamDesc{},
+	Streams:  []grpc.StreamDesc{},
+	Metadata: fileDescriptorRaft,
 }
 
 // Client API for RaftMembership service
@@ -1022,7 +1025,8 @@ var _RaftMembership_serviceDesc = grpc.ServiceDesc{
 			Handler:    _RaftMembership_Leave_Handler,
 		},
 	},
-	Streams: []grpc.StreamDesc{},
+	Streams:  []grpc.StreamDesc{},
+	Metadata: fileDescriptorRaft,
 }
 
 func (m *RaftMember) Marshal() (data []byte, err error) {
@@ -1438,12 +1442,11 @@ func encodeVarintRaft(data []byte, offset int, v uint64) int {
 
 type raftProxyRaftServer struct {
 	local        RaftServer
-	connSelector raftpicker.Interface
-	cluster      raftpicker.RaftCluster
+	connSelector raftselector.ConnProvider
 	ctxMods      []func(context.Context) (context.Context, error)
 }
 
-func NewRaftProxyRaftServer(local RaftServer, connSelector raftpicker.Interface, cluster raftpicker.RaftCluster, ctxMod func(context.Context) (context.Context, error)) RaftServer {
+func NewRaftProxyRaftServer(local RaftServer, connSelector raftselector.ConnProvider, ctxMod func(context.Context) (context.Context, error)) RaftServer {
 	redirectChecker := func(ctx context.Context) (context.Context, error) {
 		s, ok := transport.StreamFromContext(ctx)
 		if !ok {
@@ -1465,7 +1468,6 @@ func NewRaftProxyRaftServer(local RaftServer, connSelector raftpicker.Interface,
 
 	return &raftProxyRaftServer{
 		local:        local,
-		cluster:      cluster,
 		connSelector: connSelector,
 		ctxMods:      mods,
 	}
@@ -1480,73 +1482,99 @@ func (p *raftProxyRaftServer) runCtxMods(ctx context.Context) (context.Context,
 	}
 	return ctx, nil
 }
+func (p *raftProxyRaftServer) pollNewLeaderConn(ctx context.Context) (*grpc.ClientConn, error) {
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			conn, err := p.connSelector.LeaderConn(ctx)
+			if err != nil {
+				return nil, err
+			}
 
-func (p *raftProxyRaftServer) ProcessRaftMessage(ctx context.Context, r *ProcessRaftMessageRequest) (*ProcessRaftMessageResponse, error) {
+			client := NewHealthClient(conn)
 
-	if p.cluster.IsLeader() {
-		return p.local.ProcessRaftMessage(ctx, r)
+			resp, err := client.Check(ctx, &HealthCheckRequest{Service: "Raft"})
+			if err != nil || resp.Status != HealthCheckResponse_SERVING {
+				continue
+			}
+			return conn, nil
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
 	}
-	ctx, err := p.runCtxMods(ctx)
+}
+
+func (p *raftProxyRaftServer) ProcessRaftMessage(ctx context.Context, r *ProcessRaftMessageRequest) (*ProcessRaftMessageResponse, error) {
+
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.ProcessRaftMessage(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewRaftClient(conn).ProcessRaftMessage(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.ProcessRaftMessage(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewRaftClient(conn).ProcessRaftMessage(ctx, r)
+		return NewRaftClient(conn).ProcessRaftMessage(modCtx, r)
+	}
+	return resp, err
 }
 
 func (p *raftProxyRaftServer) ResolveAddress(ctx context.Context, r *ResolveAddressRequest) (*ResolveAddressResponse, error) {
 
-	if p.cluster.IsLeader() {
-		return p.local.ResolveAddress(ctx, r)
-	}
-	ctx, err := p.runCtxMods(ctx)
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.ResolveAddress(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewRaftClient(conn).ResolveAddress(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.ResolveAddress(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewRaftClient(conn).ResolveAddress(ctx, r)
+		return NewRaftClient(conn).ResolveAddress(modCtx, r)
+	}
+	return resp, err
 }
 
 type raftProxyRaftMembershipServer struct {
 	local        RaftMembershipServer
-	connSelector raftpicker.Interface
-	cluster      raftpicker.RaftCluster
+	connSelector raftselector.ConnProvider
 	ctxMods      []func(context.Context) (context.Context, error)
 }
 
-func NewRaftProxyRaftMembershipServer(local RaftMembershipServer, connSelector raftpicker.Interface, cluster raftpicker.RaftCluster, ctxMod func(context.Context) (context.Context, error)) RaftMembershipServer {
+func NewRaftProxyRaftMembershipServer(local RaftMembershipServer, connSelector raftselector.ConnProvider, ctxMod func(context.Context) (context.Context, error)) RaftMembershipServer {
 	redirectChecker := func(ctx context.Context) (context.Context, error) {
 		s, ok := transport.StreamFromContext(ctx)
 		if !ok {
@@ -1568,7 +1596,6 @@ func NewRaftProxyRaftMembershipServer(local RaftMembershipServer, connSelector r
 
 	return &raftProxyRaftMembershipServer{
 		local:        local,
-		cluster:      cluster,
 		connSelector: connSelector,
 		ctxMods:      mods,
 	}
@@ -1583,63 +1610,90 @@ func (p *raftProxyRaftMembershipServer) runCtxMods(ctx context.Context) (context
 	}
 	return ctx, nil
 }
+func (p *raftProxyRaftMembershipServer) pollNewLeaderConn(ctx context.Context) (*grpc.ClientConn, error) {
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			conn, err := p.connSelector.LeaderConn(ctx)
+			if err != nil {
+				return nil, err
+			}
 
-func (p *raftProxyRaftMembershipServer) Join(ctx context.Context, r *JoinRequest) (*JoinResponse, error) {
+			client := NewHealthClient(conn)
 
-	if p.cluster.IsLeader() {
-		return p.local.Join(ctx, r)
+			resp, err := client.Check(ctx, &HealthCheckRequest{Service: "Raft"})
+			if err != nil || resp.Status != HealthCheckResponse_SERVING {
+				continue
+			}
+			return conn, nil
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
 	}
-	ctx, err := p.runCtxMods(ctx)
+}
+
+func (p *raftProxyRaftMembershipServer) Join(ctx context.Context, r *JoinRequest) (*JoinResponse, error) {
+
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.Join(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewRaftMembershipClient(conn).Join(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.Join(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewRaftMembershipClient(conn).Join(ctx, r)
+		return NewRaftMembershipClient(conn).Join(modCtx, r)
+	}
+	return resp, err
 }
 
 func (p *raftProxyRaftMembershipServer) Leave(ctx context.Context, r *LeaveRequest) (*LeaveResponse, error) {
 
-	if p.cluster.IsLeader() {
-		return p.local.Leave(ctx, r)
-	}
-	ctx, err := p.runCtxMods(ctx)
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.Leave(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewRaftMembershipClient(conn).Leave(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.Leave(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewRaftMembershipClient(conn).Leave(ctx, r)
+		return NewRaftMembershipClient(conn).Leave(modCtx, r)
+	}
+	return resp, err
 }
 
 func (m *RaftMember) Size() (n int) {
@@ -3205,6 +3259,8 @@ var (
 	ErrIntOverflowRaft   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("raft.proto", fileDescriptorRaft) }
+
 var fileDescriptorRaft = []byte{
 	// 868 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x8c, 0x95, 0x4f, 0x73, 0xdb, 0x44,

+ 1 - 1
vendor/src/github.com/docker/swarmkit/api/raft.proto

@@ -115,7 +115,7 @@ enum StoreActionKind {
 	STORE_ACTION_REMOVE = 3 [(gogoproto.enumvalue_customname) = "StoreActionKindRemove"];
 }
 
-// StoreAction defines a taret and operation to apply on the storage system.
+// StoreAction defines a target and operation to apply on the storage system.
 message StoreAction {
 	StoreActionKind action = 1;
 	oneof target {

+ 70 - 40
vendor/src/github.com/docker/swarmkit/api/resource.pb.go

@@ -21,10 +21,11 @@ import (
 	grpc "google.golang.org/grpc"
 )
 
-import raftpicker "github.com/docker/swarmkit/manager/raftpicker"
+import raftselector "github.com/docker/swarmkit/manager/raftselector"
 import codes "google.golang.org/grpc/codes"
 import metadata "google.golang.org/grpc/metadata"
 import transport "google.golang.org/grpc/transport"
+import time "time"
 
 import io "io"
 
@@ -197,11 +198,12 @@ func valueToGoStringResource(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringResource(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringResource(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -211,7 +213,7 @@ func extensionToGoStringResource(e map[int32]github_com_gogo_protobuf_proto.Exte
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 
@@ -221,7 +223,7 @@ var _ grpc.ClientConn
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the grpc package it is being compiled against.
-const _ = grpc.SupportPackageIsVersion2
+const _ = grpc.SupportPackageIsVersion3
 
 // Client API for ResourceAllocator service
 
@@ -316,7 +318,8 @@ var _ResourceAllocator_serviceDesc = grpc.ServiceDesc{
 			Handler:    _ResourceAllocator_DetachNetwork_Handler,
 		},
 	},
-	Streams: []grpc.StreamDesc{},
+	Streams:  []grpc.StreamDesc{},
+	Metadata: fileDescriptorResource,
 }
 
 func (m *AttachNetworkRequest) Marshal() (data []byte, err error) {
@@ -449,12 +452,11 @@ func encodeVarintResource(data []byte, offset int, v uint64) int {
 
 type raftProxyResourceAllocatorServer struct {
 	local        ResourceAllocatorServer
-	connSelector raftpicker.Interface
-	cluster      raftpicker.RaftCluster
+	connSelector raftselector.ConnProvider
 	ctxMods      []func(context.Context) (context.Context, error)
 }
 
-func NewRaftProxyResourceAllocatorServer(local ResourceAllocatorServer, connSelector raftpicker.Interface, cluster raftpicker.RaftCluster, ctxMod func(context.Context) (context.Context, error)) ResourceAllocatorServer {
+func NewRaftProxyResourceAllocatorServer(local ResourceAllocatorServer, connSelector raftselector.ConnProvider, ctxMod func(context.Context) (context.Context, error)) ResourceAllocatorServer {
 	redirectChecker := func(ctx context.Context) (context.Context, error) {
 		s, ok := transport.StreamFromContext(ctx)
 		if !ok {
@@ -476,7 +478,6 @@ func NewRaftProxyResourceAllocatorServer(local ResourceAllocatorServer, connSele
 
 	return &raftProxyResourceAllocatorServer{
 		local:        local,
-		cluster:      cluster,
 		connSelector: connSelector,
 		ctxMods:      mods,
 	}
@@ -491,63 +492,90 @@ func (p *raftProxyResourceAllocatorServer) runCtxMods(ctx context.Context) (cont
 	}
 	return ctx, nil
 }
+func (p *raftProxyResourceAllocatorServer) pollNewLeaderConn(ctx context.Context) (*grpc.ClientConn, error) {
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			conn, err := p.connSelector.LeaderConn(ctx)
+			if err != nil {
+				return nil, err
+			}
 
-func (p *raftProxyResourceAllocatorServer) AttachNetwork(ctx context.Context, r *AttachNetworkRequest) (*AttachNetworkResponse, error) {
+			client := NewHealthClient(conn)
 
-	if p.cluster.IsLeader() {
-		return p.local.AttachNetwork(ctx, r)
+			resp, err := client.Check(ctx, &HealthCheckRequest{Service: "Raft"})
+			if err != nil || resp.Status != HealthCheckResponse_SERVING {
+				continue
+			}
+			return conn, nil
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
 	}
-	ctx, err := p.runCtxMods(ctx)
+}
+
+func (p *raftProxyResourceAllocatorServer) AttachNetwork(ctx context.Context, r *AttachNetworkRequest) (*AttachNetworkResponse, error) {
+
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.AttachNetwork(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewResourceAllocatorClient(conn).AttachNetwork(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.AttachNetwork(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewResourceAllocatorClient(conn).AttachNetwork(ctx, r)
+		return NewResourceAllocatorClient(conn).AttachNetwork(modCtx, r)
+	}
+	return resp, err
 }
 
 func (p *raftProxyResourceAllocatorServer) DetachNetwork(ctx context.Context, r *DetachNetworkRequest) (*DetachNetworkResponse, error) {
 
-	if p.cluster.IsLeader() {
-		return p.local.DetachNetwork(ctx, r)
-	}
-	ctx, err := p.runCtxMods(ctx)
+	conn, err := p.connSelector.LeaderConn(ctx)
 	if err != nil {
+		if err == raftselector.ErrIsLeader {
+			return p.local.DetachNetwork(ctx, r)
+		}
 		return nil, err
 	}
-	conn, err := p.connSelector.Conn()
+	modCtx, err := p.runCtxMods(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	defer func() {
+	resp, err := NewResourceAllocatorClient(conn).DetachNetwork(modCtx, r)
+	if err != nil {
+		if !strings.Contains(err.Error(), "is closing") && !strings.Contains(err.Error(), "the connection is unavailable") && !strings.Contains(err.Error(), "connection error") {
+			return resp, err
+		}
+		conn, err := p.pollNewLeaderConn(ctx)
 		if err != nil {
-			errStr := err.Error()
-			if strings.Contains(errStr, grpc.ErrClientConnClosing.Error()) ||
-				strings.Contains(errStr, grpc.ErrClientConnTimeout.Error()) ||
-				strings.Contains(errStr, "connection error") ||
-				grpc.Code(err) == codes.Internal {
-				p.connSelector.Reset()
+			if err == raftselector.ErrIsLeader {
+				return p.local.DetachNetwork(ctx, r)
 			}
+			return nil, err
 		}
-	}()
-
-	return NewResourceAllocatorClient(conn).DetachNetwork(ctx, r)
+		return NewResourceAllocatorClient(conn).DetachNetwork(modCtx, r)
+	}
+	return resp, err
 }
 
 func (m *AttachNetworkRequest) Size() (n int) {
@@ -1076,6 +1104,8 @@ var (
 	ErrIntOverflowResource   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("resource.proto", fileDescriptorResource) }
+
 var fileDescriptorResource = []byte{
 	// 373 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0x2b, 0x4a, 0x2d, 0xce,

+ 6 - 3
vendor/src/github.com/docker/swarmkit/api/snapshot.pb.go

@@ -222,11 +222,12 @@ func valueToGoStringSnapshot(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringSnapshot(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringSnapshot(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -236,7 +237,7 @@ func extensionToGoStringSnapshot(e map[int32]github_com_gogo_protobuf_proto.Exte
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 func (m *StoreSnapshot) Marshal() (data []byte, err error) {
@@ -1085,6 +1086,8 @@ var (
 	ErrIntOverflowSnapshot   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("snapshot.proto", fileDescriptorSnapshot) }
+
 var fileDescriptorSnapshot = []byte{
 	// 396 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x7c, 0x92, 0xbd, 0x6e, 0xdb, 0x30,

+ 49 - 41
vendor/src/github.com/docker/swarmkit/api/specs.pb.go

@@ -1047,11 +1047,12 @@ func valueToGoStringSpecs(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringSpecs(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringSpecs(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -1061,7 +1062,7 @@ func extensionToGoStringSpecs(e map[int32]github_com_gogo_protobuf_proto.Extensi
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 func (m *NodeSpec) Marshal() (data []byte, err error) {
@@ -3252,50 +3253,55 @@ func (m *ContainerSpec) Unmarshal(data []byte) error {
 			}
 			mapkey := string(data[iNdEx:postStringIndexmapkey])
 			iNdEx = postStringIndexmapkey
-			var valuekey uint64
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return ErrIntOverflowSpecs
-				}
-				if iNdEx >= l {
-					return io.ErrUnexpectedEOF
+			if m.Labels == nil {
+				m.Labels = make(map[string]string)
+			}
+			if iNdEx < postIndex {
+				var valuekey uint64
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return ErrIntOverflowSpecs
+					}
+					if iNdEx >= l {
+						return io.ErrUnexpectedEOF
+					}
+					b := data[iNdEx]
+					iNdEx++
+					valuekey |= (uint64(b) & 0x7F) << shift
+					if b < 0x80 {
+						break
+					}
 				}
-				b := data[iNdEx]
-				iNdEx++
-				valuekey |= (uint64(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
+				var stringLenmapvalue uint64
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return ErrIntOverflowSpecs
+					}
+					if iNdEx >= l {
+						return io.ErrUnexpectedEOF
+					}
+					b := data[iNdEx]
+					iNdEx++
+					stringLenmapvalue |= (uint64(b) & 0x7F) << shift
+					if b < 0x80 {
+						break
+					}
 				}
-			}
-			var stringLenmapvalue uint64
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return ErrIntOverflowSpecs
+				intStringLenmapvalue := int(stringLenmapvalue)
+				if intStringLenmapvalue < 0 {
+					return ErrInvalidLengthSpecs
 				}
-				if iNdEx >= l {
+				postStringIndexmapvalue := iNdEx + intStringLenmapvalue
+				if postStringIndexmapvalue > l {
 					return io.ErrUnexpectedEOF
 				}
-				b := data[iNdEx]
-				iNdEx++
-				stringLenmapvalue |= (uint64(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			intStringLenmapvalue := int(stringLenmapvalue)
-			if intStringLenmapvalue < 0 {
-				return ErrInvalidLengthSpecs
+				mapvalue := string(data[iNdEx:postStringIndexmapvalue])
+				iNdEx = postStringIndexmapvalue
+				m.Labels[mapkey] = mapvalue
+			} else {
+				var mapvalue string
+				m.Labels[mapkey] = mapvalue
 			}
-			postStringIndexmapvalue := iNdEx + intStringLenmapvalue
-			if postStringIndexmapvalue > l {
-				return io.ErrUnexpectedEOF
-			}
-			mapvalue := string(data[iNdEx:postStringIndexmapvalue])
-			iNdEx = postStringIndexmapvalue
-			if m.Labels == nil {
-				m.Labels = make(map[string]string)
-			}
-			m.Labels[mapkey] = mapvalue
 			iNdEx = postIndex
 		case 3:
 			if wireType != 2 {
@@ -4339,6 +4345,8 @@ var (
 	ErrIntOverflowSpecs   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("specs.proto", fileDescriptorSpecs) }
+
 var fileDescriptorSpecs = []byte{
 	// 1397 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xac, 0x57, 0x4f, 0x6f, 0xdb, 0xc6,

+ 9 - 4
vendor/src/github.com/docker/swarmkit/api/timestamp/timestamp.pb.go

@@ -32,7 +32,9 @@ var _ = math.Inf
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the proto package it is being compiled against.
-const _ = proto.GoGoProtoPackageIsVersion1
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
 
 // A Timestamp represents a point in time independent of any time zone
 // or calendar, represented as seconds and fractions of seconds at
@@ -141,11 +143,12 @@ func valueToGoStringTimestamp(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringTimestamp(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringTimestamp(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -155,7 +158,7 @@ func extensionToGoStringTimestamp(e map[int32]github_com_gogo_protobuf_proto.Ext
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 func (m *Timestamp) Marshal() (data []byte, err error) {
@@ -451,6 +454,8 @@ var (
 	ErrIntOverflowTimestamp   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("timestamp.proto", fileDescriptorTimestamp) }
+
 var fileDescriptorTimestamp = []byte{
 	// 205 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0x2f, 0xc9, 0xcc, 0x4d,

文件差异内容过多而无法显示
+ 431 - 275
vendor/src/github.com/docker/swarmkit/api/types.pb.go


+ 43 - 9
vendor/src/github.com/docker/swarmkit/api/types.proto

@@ -281,15 +281,46 @@ message UpdateConfig {
 	enum FailureAction {
 		PAUSE = 0;
 		CONTINUE = 1;
-		// TODO(aaronl): Add ROLLBACK as a supported failure mode.
-		// (#486)
+		// NOTE: Automated rollback triggered as a failure action is an
+		// experimental feature that is not yet exposed to the end
+		// user. Currently, rollbacks must be initiated manually
+		// through the API by setting Spec to PreviousSpec. We may
+		// decide to expose automatic rollback in the future based on
+		// user feedback, or remove this feature otherwise.
+		ROLLBACK = 2;
 	}
 
 	// FailureAction is the action to take when an update failures.
-	// Currently, a failure is defined as a single updated task failing to
-	// reach the RUNNING state. In the future, there will be configuration
-	// to define what is treated as a failure (see #486 for a proposal).
 	FailureAction failure_action = 3;
+
+	// Monitor indicates how long to monitor a task for failure after it is
+	// created. If the task fails by ending up in one of the states
+	// REJECTED, COMPLETED, or FAILED, within Monitor from its creation,
+	// this counts as a failure. If it fails after Monitor, it does not
+	// count as a failure. If Monitor is unspecified, a default value will
+	// be used.
+	Duration monitor = 4;
+
+	// AllowedFailureFraction is the fraction of tasks that may fail during
+	// an update before the failure action is invoked. Any task created by
+	// the current update which ends up in one of the states REJECTED,
+	// COMPLETED or FAILED within Monitor from its creation counts as a
+	// failure. The number of failures is divided by the number of tasks
+	// being updated, and if this fraction is greater than
+	// AllowedFailureFraction, the failure action is invoked.
+	//
+	// If the failure action is CONTINUE, there is no effect.
+	// If the failure action is PAUSE, no more tasks will be updated until
+	// another update is started.
+	// If the failure action is ROLLBACK, the orchestrator will attempt to
+	// roll back to the previous service spec. If the AllowedFailureFraction
+	// threshold is hit during the rollback, the rollback will pause.
+	//
+	// TODO(aaronl): Should there be a separate failure threshold for
+	// rollbacks? Should there be a failure action for rollbacks (to allow
+	// them to do something other than pause when the rollback encounters
+	// errors)?
+	float allowed_failure_fraction = 5;
 }
 
 // UpdateStatus is the status of an update in progress.
@@ -299,18 +330,21 @@ message UpdateStatus {
 		UPDATING = 1;
 		PAUSED = 2;
 		COMPLETED = 3;
-		// TODO(aaronl): add ROLLING_BACK, ROLLED_BACK as part of
-		// rollback support.
+		ROLLBACK_STARTED = 4;
+		ROLLBACK_PAUSED = 5; // if a rollback fails
+		ROLLBACK_COMPLETED = 6;
 	}
 
 	// State is the state of this update. It indicates whether the
-	// update is in progress, completed, or is paused.
+	// update is in progress, completed, paused, rolling back, or
+	// finished rolling back.
 	UpdateState state = 1;
 
 	// StartedAt is the time at which the update was started.
 	Timestamp started_at = 2;
 
-	// CompletedAt is the time at which the update completed.
+	// CompletedAt is the time at which the update completed successfully,
+	// paused, or finished rolling back.
 	Timestamp completed_at = 3;
 
 	// TODO(aaronl): Consider adding a timestamp showing when the most

+ 9 - 30
vendor/src/github.com/docker/swarmkit/ca/certificates.go

@@ -16,7 +16,6 @@ import (
 	"path/filepath"
 	"time"
 
-	log "github.com/Sirupsen/logrus"
 	cfcsr "github.com/cloudflare/cfssl/csr"
 	"github.com/cloudflare/cfssl/helpers"
 	"github.com/cloudflare/cfssl/initca"
@@ -117,8 +116,7 @@ func (rca *RootCA) CanSign() bool {
 func (rca *RootCA) IssueAndSaveNewCertificates(paths CertPaths, cn, ou, org string) (*tls.Certificate, error) {
 	csr, key, err := GenerateAndWriteNewKey(paths)
 	if err != nil {
-		log.Debugf("error when generating new node certs: %v", err)
-		return nil, err
+		return nil, fmt.Errorf("error when generating new node certs: %v", err)
 	}
 
 	if !rca.CanSign() {
@@ -128,8 +126,7 @@ func (rca *RootCA) IssueAndSaveNewCertificates(paths CertPaths, cn, ou, org stri
 	// Obtain a signed Certificate
 	certChain, err := rca.ParseValidateAndSignCSR(csr, cn, ou, org)
 	if err != nil {
-		log.Debugf("failed to sign node certificate: %v", err)
-		return nil, err
+		return nil, fmt.Errorf("failed to sign node certificate: %v", err)
 	}
 
 	// Ensure directory exists
@@ -149,20 +146,18 @@ func (rca *RootCA) IssueAndSaveNewCertificates(paths CertPaths, cn, ou, org stri
 		return nil, err
 	}
 
-	log.Debugf("locally issued new TLS certificate for node ID: %s and role: %s", cn, ou)
 	return &tlsKeyPair, nil
 }
 
 // RequestAndSaveNewCertificates gets new certificates issued, either by signing them locally if a signer is
 // available, or by requesting them from the remote server at remoteAddr.
-func (rca *RootCA) RequestAndSaveNewCertificates(ctx context.Context, paths CertPaths, token string, remotes remotes.Remotes, transport credentials.TransportAuthenticator, nodeInfo chan<- api.IssueNodeCertificateResponse) (*tls.Certificate, error) {
+func (rca *RootCA) RequestAndSaveNewCertificates(ctx context.Context, paths CertPaths, token string, remotes remotes.Remotes, transport credentials.TransportCredentials, nodeInfo chan<- api.IssueNodeCertificateResponse) (*tls.Certificate, error) {
 	// Create a new key/pair and CSR for the new manager
 	// Write the new CSR and the new key to a temporary location so we can survive crashes on rotation
 	tempPaths := genTempPaths(paths)
 	csr, key, err := GenerateAndWriteNewKey(tempPaths)
 	if err != nil {
-		log.Debugf("error when generating new node certs: %v", err)
-		return nil, err
+		return nil, fmt.Errorf("error when generating new node certs: %v", err)
 	}
 
 	// Get the remote manager to issue a CA signed certificate for this node
@@ -174,7 +169,6 @@ func (rca *RootCA) RequestAndSaveNewCertificates(ctx context.Context, paths Cert
 		if err == nil {
 			break
 		}
-		log.Warningf("error fetching signed node certificate: %v", err)
 	}
 	if err != nil {
 		return nil, err
@@ -206,10 +200,6 @@ func (rca *RootCA) RequestAndSaveNewCertificates(ctx context.Context, paths Cert
 		return nil, err
 	}
 
-	if len(X509Cert.Subject.OrganizationalUnit) != 0 {
-		log.Infof("Downloaded new TLS credentials with role: %s.", X509Cert.Subject.OrganizationalUnit[0])
-	}
-
 	// Ensure directory exists
 	err = os.MkdirAll(filepath.Dir(paths.Cert), 0755)
 	if err != nil {
@@ -259,8 +249,7 @@ func (rca *RootCA) ParseValidateAndSignCSR(csrBytes []byte, cn, ou, org string)
 
 	cert, err := rca.Signer.Sign(signRequest)
 	if err != nil {
-		log.Debugf("failed to sign node certificate: %v", err)
-		return nil, err
+		return nil, fmt.Errorf("failed to sign node certificate: %v", err)
 	}
 
 	return rca.AppendFirstRootPEM(cert)
@@ -342,8 +331,7 @@ func NewRootCA(certBytes, keyBytes []byte, certExpiry time.Duration) (RootCA, er
 	if err != nil {
 		priv, err = helpers.ParsePrivateKeyPEMWithPassword(keyBytes, passphrasePrev)
 		if err != nil {
-			log.Debug("Malformed private key %v", err)
-			return RootCA{}, err
+			return RootCA{}, fmt.Errorf("Malformed private key: %v", err)
 		}
 	}
 
@@ -414,12 +402,7 @@ func GetLocalRootCA(baseDir string) (RootCA, error) {
 		key = nil
 	}
 
-	rootCA, err := NewRootCA(cert, key, DefaultNodeCertExpiration)
-	if err == nil {
-		log.Debugf("successfully loaded the Root CA: %s", paths.RootCA.Cert)
-	}
-
-	return rootCA, err
+	return NewRootCA(cert, key, DefaultNodeCertExpiration)
 }
 
 // GetRemoteCA returns the remote endpoint's CA certificate
@@ -552,8 +535,7 @@ func GenerateAndSignNewTLSCert(rootCA RootCA, cn, ou, org string, paths CertPath
 	// Obtain a signed Certificate
 	certChain, err := rootCA.ParseValidateAndSignCSR(csr, cn, ou, org)
 	if err != nil {
-		log.Debugf("failed to sign node certificate: %v", err)
-		return nil, err
+		return nil, fmt.Errorf("failed to sign node certificate: %v", err)
 	}
 
 	// Ensure directory exists
@@ -603,7 +585,7 @@ func GenerateAndWriteNewKey(paths CertPaths) (csr, key []byte, err error) {
 
 // GetRemoteSignedCertificate submits a CSR to a remote CA server address,
 // and that is part of a CA identified by a specific certificate pool.
-func GetRemoteSignedCertificate(ctx context.Context, csr []byte, token string, rootCAPool *x509.CertPool, r remotes.Remotes, creds credentials.TransportAuthenticator, nodeInfo chan<- api.IssueNodeCertificateResponse) ([]byte, error) {
+func GetRemoteSignedCertificate(ctx context.Context, csr []byte, token string, rootCAPool *x509.CertPool, r remotes.Remotes, creds credentials.TransportCredentials, nodeInfo chan<- api.IssueNodeCertificateResponse) ([]byte, error) {
 	if rootCAPool == nil {
 		return nil, fmt.Errorf("valid root CA pool required")
 	}
@@ -653,7 +635,6 @@ func GetRemoteSignedCertificate(ctx context.Context, csr []byte, token string, r
 		Max:    30 * time.Second,
 	})
 
-	log.Infof("Waiting for TLS certificate to be issued...")
 	// Exponential backoff with Max of 30 seconds to wait for a new retry
 	for {
 		// Send the Request and retrieve the certificate
@@ -694,7 +675,6 @@ func readCertExpiration(paths CertPaths) (time.Duration, error) {
 	// Read the Cert
 	cert, err := ioutil.ReadFile(paths.Cert)
 	if err != nil {
-		log.Debugf("failed to read certificate file: %s", paths.Cert)
 		return time.Hour, err
 	}
 
@@ -730,7 +710,6 @@ func generateNewCSR() (csr, key []byte, err error) {
 
 	csr, key, err = cfcsr.ParseRequest(req)
 	if err != nil {
-		log.Debugf(`failed to generate CSR`)
 		return
 	}
 

+ 51 - 22
vendor/src/github.com/docker/swarmkit/ca/config.go

@@ -15,11 +15,12 @@ import (
 	"sync"
 	"time"
 
-	log "github.com/Sirupsen/logrus"
+	"github.com/Sirupsen/logrus"
 	cfconfig "github.com/cloudflare/cfssl/config"
 	"github.com/docker/distribution/digest"
 	"github.com/docker/swarmkit/api"
 	"github.com/docker/swarmkit/identity"
+	"github.com/docker/swarmkit/log"
 	"github.com/docker/swarmkit/remotes"
 
 	"golang.org/x/net/context"
@@ -35,8 +36,8 @@ const (
 	rootCN = "swarm-ca"
 	// ManagerRole represents the Manager node type, and is used for authorization to endpoints
 	ManagerRole = "swarm-manager"
-	// AgentRole represents the Agent node type, and is used for authorization to endpoints
-	AgentRole = "swarm-worker"
+	// WorkerRole represents the Worker node type, and is used for authorization to endpoints
+	WorkerRole = "swarm-worker"
 	// CARole represents the CA node type, and is used for clients attempting to get new certificates issued
 	CARole = "swarm-ca"
 
@@ -184,6 +185,7 @@ func getCAHashFromToken(token string) (digest.Digest, error) {
 // Every node requires at least a set of TLS certificates with which to join the cluster with.
 // In the case of a manager, these certificates will be used both for client and server credentials.
 func LoadOrCreateSecurityConfig(ctx context.Context, baseCertDir, token, proposedRole string, remotes remotes.Remotes, nodeInfo chan<- api.IssueNodeCertificateResponse) (*SecurityConfig, error) {
+	ctx = log.WithModule(ctx, "tls")
 	paths := NewConfigPaths(baseCertDir)
 
 	var (
@@ -196,9 +198,9 @@ func LoadOrCreateSecurityConfig(ctx context.Context, baseCertDir, token, propose
 	rootCA, err = GetLocalRootCA(baseCertDir)
 	switch err {
 	case nil:
-		log.Debugf("loaded local CA certificate: %s.", paths.RootCA.Cert)
+		log.G(ctx).Debug("loaded CA certificate")
 	case ErrNoLocalRootCA:
-		log.Debugf("no valid local CA certificate found: %v", err)
+		log.G(ctx).WithError(err).Debugf("failed to load local CA certificate")
 
 		// Get a digest for the optional CA hash string that we've been provided
 		// If we were provided a non-empty string, and it is an invalid hash, return
@@ -221,7 +223,7 @@ func LoadOrCreateSecurityConfig(ctx context.Context, baseCertDir, token, propose
 			if err == nil {
 				break
 			}
-			log.Warningf("failed to retrieve remote root CA certificate: %v", err)
+			log.G(ctx).WithError(err).Errorf("failed to retrieve remote root CA certificate")
 		}
 		if err != nil {
 			return nil, err
@@ -232,7 +234,7 @@ func LoadOrCreateSecurityConfig(ctx context.Context, baseCertDir, token, propose
 			return nil, err
 		}
 
-		log.Debugf("downloaded remote CA certificate.")
+		log.G(ctx).Debugf("retrieved remote CA certificate: %s", paths.RootCA.Cert)
 	default:
 		return nil, err
 	}
@@ -242,7 +244,7 @@ func LoadOrCreateSecurityConfig(ctx context.Context, baseCertDir, token, propose
 	// load our certificates.
 	clientTLSCreds, serverTLSCreds, err = LoadTLSCreds(rootCA, paths.Node)
 	if err != nil {
-		log.Debugf("no valid local TLS credentials found: %v", err)
+		log.G(ctx).WithError(err).Debugf("no node credentials found in: %s", paths.Node.Cert)
 
 		var (
 			tlsKeyPair *tls.Certificate
@@ -262,17 +264,27 @@ func LoadOrCreateSecurityConfig(ctx context.Context, baseCertDir, token, propose
 			}
 			tlsKeyPair, err = rootCA.IssueAndSaveNewCertificates(paths.Node, cn, proposedRole, org)
 			if err != nil {
+				log.G(ctx).WithFields(logrus.Fields{
+					"node.id":   cn,
+					"node.role": proposedRole,
+				}).WithError(err).Errorf("failed to issue and save new certificate")
 				return nil, err
 			}
+
+			log.G(ctx).WithFields(logrus.Fields{
+				"node.id":   cn,
+				"node.role": proposedRole,
+			}).Debug("issued new TLS certificate")
 		} else {
 			// There was an error loading our Credentials, let's get a new certificate issued
 			// Last argument is nil because at this point we don't have any valid TLS creds
 			tlsKeyPair, err = rootCA.RequestAndSaveNewCertificates(ctx, paths.Node, token, remotes, nil, nodeInfo)
 			if err != nil {
+				log.G(ctx).WithError(err).Error("failed to request save new certificate")
 				return nil, err
 			}
 		}
-		// Create the Server TLS Credentials for this node. These will not be used by agents.
+		// Create the Server TLS Credentials for this node. These will not be used by workers.
 		serverTLSCreds, err = rootCA.NewServerTLSCredentials(tlsKeyPair)
 		if err != nil {
 			return nil, err
@@ -284,7 +296,10 @@ func LoadOrCreateSecurityConfig(ctx context.Context, baseCertDir, token, propose
 		if err != nil {
 			return nil, err
 		}
-		log.Debugf("new TLS credentials generated: %s.", paths.Node.Cert)
+		log.G(ctx).WithFields(logrus.Fields{
+			"node.id":   clientTLSCreds.NodeID(),
+			"node.role": clientTLSCreds.Role(),
+		}).Debugf("new node credentials generated: %s", paths.Node.Cert)
 	} else {
 		if nodeInfo != nil {
 			nodeInfo <- api.IssueNodeCertificateResponse{
@@ -292,7 +307,10 @@ func LoadOrCreateSecurityConfig(ctx context.Context, baseCertDir, token, propose
 				NodeMembership: api.NodeMembershipAccepted,
 			}
 		}
-		log.Debugf("loaded local TLS credentials: %s.", paths.Node.Cert)
+		log.G(ctx).WithFields(logrus.Fields{
+			"node.id":   clientTLSCreds.NodeID(),
+			"node.role": clientTLSCreds.Role(),
+		}).Debug("loaded node credentials")
 	}
 
 	return NewSecurityConfig(&rootCA, clientTLSCreds, serverTLSCreds), nil
@@ -308,6 +326,11 @@ func RenewTLSConfig(ctx context.Context, s *SecurityConfig, baseCertDir string,
 		var retry time.Duration
 		defer close(updates)
 		for {
+			ctx = log.WithModule(ctx, "tls")
+			log := log.G(ctx).WithFields(logrus.Fields{
+				"node.id":   s.ClientTLSCreds.NodeID(),
+				"node.role": s.ClientTLSCreds.Role(),
+			})
 			// Our starting default will be 5 minutes
 			retry = 5 * time.Minute
 
@@ -323,21 +346,27 @@ func RenewTLSConfig(ctx context.Context, s *SecurityConfig, baseCertDir string,
 				// If we have an expired certificate, we let's stick with the starting default in
 				// the hope that this is a temporary clock skew.
 				if expiresIn.Minutes() < 0 {
-					log.Debugf("failed to create a new client TLS config: %v", err)
-					updates <- CertificateUpdate{Err: fmt.Errorf("TLS Certificate is expired")}
+					log.WithError(err).Errorf("failed to create a new client TLS config")
+					updates <- CertificateUpdate{Err: fmt.Errorf("TLS certificate is expired")}
 				} else {
 					// Random retry time between 50% and 80% of the total time to expiration
 					retry = calculateRandomExpiry(expiresIn)
 				}
 			}
 
+			log.WithFields(logrus.Fields{
+				"time": time.Now().Add(retry),
+			}).Debugf("next certificate renewal scheduled")
+
 			select {
 			case <-time.After(retry):
+				log.Infof("renewing certificate")
 			case <-renew:
+				log.Infof("forced certificate renewal")
 			case <-ctx.Done():
+				log.Infof("shuting down certificate renewal routine")
 				return
 			}
-			log.Infof("Renewing TLS Certificate.")
 
 			// Let's request new certs. Renewals don't require a token.
 			rootCA := s.RootCA()
@@ -348,25 +377,25 @@ func RenewTLSConfig(ctx context.Context, s *SecurityConfig, baseCertDir string,
 				s.ClientTLSCreds,
 				nil)
 			if err != nil {
-				log.Debugf("failed to renew the TLS Certificate: %v", err)
+				log.WithError(err).Errorf("failed to renew the certificate")
 				updates <- CertificateUpdate{Err: err}
 				continue
 			}
 
 			clientTLSConfig, err := NewClientTLSConfig(tlsKeyPair, rootCA.Pool, CARole)
 			if err != nil {
-				log.Debugf("failed to create a new client TLS config: %v", err)
+				log.WithError(err).Errorf("failed to create a new client config")
 				updates <- CertificateUpdate{Err: err}
 			}
 			serverTLSConfig, err := NewServerTLSConfig(tlsKeyPair, rootCA.Pool)
 			if err != nil {
-				log.Debugf("failed to create a new server TLS config: %v", err)
+				log.WithError(err).Errorf("failed to create a new server config")
 				updates <- CertificateUpdate{Err: err}
 			}
 
 			err = s.ClientTLSCreds.LoadNewTLSConfig(clientTLSConfig)
 			if err != nil {
-				log.Debugf("failed to update the client TLS credentials: %v", err)
+				log.WithError(err).Errorf("failed to update the client credentials")
 				updates <- CertificateUpdate{Err: err}
 			}
 
@@ -380,7 +409,7 @@ func RenewTLSConfig(ctx context.Context, s *SecurityConfig, baseCertDir string,
 
 			err = s.ServerTLSCreds.LoadNewTLSConfig(serverTLSConfig)
 			if err != nil {
-				log.Debugf("failed to update the server TLS credentials: %v", err)
+				log.WithError(err).Errorf("failed to update the server TLS credentials")
 				updates <- CertificateUpdate{Err: err}
 			}
 
@@ -478,7 +507,7 @@ func LoadTLSCreds(rootCA RootCA, paths CertPaths) (*MutableTLSCreds, *MutableTLS
 	}
 
 	// Load the Certificates also as client credentials.
-	// Both Agents and Managers always connect to remote Managers,
+	// Both workers and managers always connect to remote managers,
 	// so ServerName is always set to ManagerRole here.
 	clientTLSCreds, err := rootCA.NewClientTLSCredentials(&keyPair, ManagerRole)
 	if err != nil {
@@ -561,7 +590,7 @@ func ParseRole(apiRole api.NodeRole) (string, error) {
 	case api.NodeRoleManager:
 		return ManagerRole, nil
 	case api.NodeRoleWorker:
-		return AgentRole, nil
+		return WorkerRole, nil
 	default:
 		return "", fmt.Errorf("failed to parse api role: %v", apiRole)
 	}
@@ -572,7 +601,7 @@ func FormatRole(role string) (api.NodeRole, error) {
 	switch strings.ToLower(role) {
 	case strings.ToLower(ManagerRole):
 		return api.NodeRoleManager, nil
-	case strings.ToLower(AgentRole):
+	case strings.ToLower(WorkerRole):
 		return api.NodeRoleWorker, nil
 	default:
 		return 0, fmt.Errorf("failed to parse role: %s", role)

+ 4 - 4
vendor/src/github.com/docker/swarmkit/ca/server.go

@@ -149,14 +149,14 @@ func (s *Server) IssueNodeCertificate(ctx context.Context, request *api.IssueNod
 	}
 	defer s.doneTask()
 
-	// If the remote node is an Agent (either forwarded by a manager, or calling directly),
-	// issue a renew agent certificate entry with the correct ID
-	nodeID, err := AuthorizeForwardedRoleAndOrg(ctx, []string{AgentRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization())
+	// If the remote node is a worker (either forwarded by a manager, or calling directly),
+	// issue a renew worker certificate entry with the correct ID
+	nodeID, err := AuthorizeForwardedRoleAndOrg(ctx, []string{WorkerRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization())
 	if err == nil {
 		return s.issueRenewCertificate(ctx, nodeID, request.CSR)
 	}
 
-	// If the remote node is a Manager (either forwarded by another manager, or calling directly),
+	// If the remote node is a manager (either forwarded by another manager, or calling directly),
 	// issue a renew certificate entry with the correct ID
 	nodeID, err = AuthorizeForwardedRoleAndOrg(ctx, []string{ManagerRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization())
 	if err == nil {

+ 19 - 27
vendor/src/github.com/docker/swarmkit/ca/transport.go

@@ -8,7 +8,6 @@ import (
 	"net"
 	"strings"
 	"sync"
-	"time"
 
 	"google.golang.org/grpc/credentials"
 
@@ -33,12 +32,12 @@ type MutableTLSCreds struct {
 	// TLS configuration
 	config *tls.Config
 	// TLS Credentials
-	tlsCreds credentials.TransportAuthenticator
+	tlsCreds credentials.TransportCredentials
 	// store the subject for easy access
 	subject pkix.Name
 }
 
-// Info implements the credentials.TransportAuthenticator interface
+// Info implements the credentials.TransportCredentials interface
 func (c *MutableTLSCreds) Info() credentials.ProtocolInfo {
 	return credentials.ProtocolInfo{
 		SecurityProtocol: "tls",
@@ -46,26 +45,19 @@ func (c *MutableTLSCreds) Info() credentials.ProtocolInfo {
 	}
 }
 
-// GetRequestMetadata implements the credentials.TransportAuthenticator interface
+// GetRequestMetadata implements the credentials.TransportCredentials interface
 func (c *MutableTLSCreds) GetRequestMetadata(ctx context.Context, uri ...string) (map[string]string, error) {
 	return nil, nil
 }
 
-// RequireTransportSecurity implements the credentials.TransportAuthenticator interface
+// RequireTransportSecurity implements the credentials.TransportCredentials interface
 func (c *MutableTLSCreds) RequireTransportSecurity() bool {
 	return true
 }
 
-// ClientHandshake implements the credentials.TransportAuthenticator interface
-func (c *MutableTLSCreds) ClientHandshake(addr string, rawConn net.Conn, timeout time.Duration) (net.Conn, credentials.AuthInfo, error) {
+// ClientHandshake implements the credentials.TransportCredentials interface
+func (c *MutableTLSCreds) ClientHandshake(ctx context.Context, addr string, rawConn net.Conn) (net.Conn, credentials.AuthInfo, error) {
 	// borrow all the code from the original TLS credentials
-	var errChannel chan error
-	if timeout != 0 {
-		errChannel = make(chan error, 2)
-		time.AfterFunc(timeout, func() {
-			errChannel <- timeoutError{}
-		})
-	}
 	c.Lock()
 	if c.config.ServerName == "" {
 		colonPos := strings.LastIndex(addr, ":")
@@ -80,23 +72,23 @@ func (c *MutableTLSCreds) ClientHandshake(addr string, rawConn net.Conn, timeout
 	// would create a deadlock otherwise
 	c.Unlock()
 	var err error
-	if timeout == 0 {
-		err = conn.Handshake()
-	} else {
-		go func() {
-			errChannel <- conn.Handshake()
-		}()
-		err = <-errChannel
+	errChannel := make(chan error, 1)
+	go func() {
+		errChannel <- conn.Handshake()
+	}()
+	select {
+	case err = <-errChannel:
+	case <-ctx.Done():
+		err = ctx.Err()
 	}
 	if err != nil {
 		rawConn.Close()
 		return nil, nil, err
 	}
-
 	return conn, nil, nil
 }
 
-// ServerHandshake implements the credentials.TransportAuthenticator interface
+// ServerHandshake implements the credentials.TransportCredentials interface
 func (c *MutableTLSCreds) ServerHandshake(rawConn net.Conn) (net.Conn, credentials.AuthInfo, error) {
 	c.Lock()
 	conn := tls.Server(rawConn, c.config)
@@ -132,7 +124,7 @@ func (c *MutableTLSCreds) Config() *tls.Config {
 	return c.config
 }
 
-// Role returns the OU for the certificate encapsulated in this TransportAuthenticator
+// Role returns the OU for the certificate encapsulated in this TransportCredentials
 func (c *MutableTLSCreds) Role() string {
 	c.Lock()
 	defer c.Unlock()
@@ -140,7 +132,7 @@ func (c *MutableTLSCreds) Role() string {
 	return c.subject.OrganizationalUnit[0]
 }
 
-// Organization returns the O for the certificate encapsulated in this TransportAuthenticator
+// Organization returns the O for the certificate encapsulated in this TransportCredentials
 func (c *MutableTLSCreds) Organization() string {
 	c.Lock()
 	defer c.Unlock()
@@ -148,7 +140,7 @@ func (c *MutableTLSCreds) Organization() string {
 	return c.subject.Organization[0]
 }
 
-// NodeID returns the CN for the certificate encapsulated in this TransportAuthenticator
+// NodeID returns the CN for the certificate encapsulated in this TransportCredentials
 func (c *MutableTLSCreds) NodeID() string {
 	c.Lock()
 	defer c.Unlock()
@@ -156,7 +148,7 @@ func (c *MutableTLSCreds) NodeID() string {
 	return c.subject.CommonName
 }
 
-// NewMutableTLS uses c to construct a mutable TransportAuthenticator based on TLS.
+// NewMutableTLS uses c to construct a mutable TransportCredentials based on TLS.
 func NewMutableTLS(c *tls.Config) (*MutableTLSCreds, error) {
 	originalTC := credentials.NewTLS(c)
 

+ 8 - 0
vendor/src/github.com/docker/swarmkit/manager/allocator/network.go

@@ -564,7 +564,9 @@ func (a *Allocator) allocateNode(ctx context.Context, nc *networkContext, node *
 
 func (a *Allocator) allocateService(ctx context.Context, nc *networkContext, s *api.Service) error {
 	if s.Spec.Endpoint != nil {
+		// service has user-defined endpoint
 		if s.Endpoint == nil {
+			// service currently has no allocated endpoint, need allocated.
 			s.Endpoint = &api.Endpoint{
 				Spec: s.Spec.Endpoint.Copy(),
 			}
@@ -587,6 +589,12 @@ func (a *Allocator) allocateService(ctx context.Context, nc *networkContext, s *
 					&api.Endpoint_VirtualIP{NetworkID: nc.ingressNetwork.ID})
 			}
 		}
+	} else if s.Endpoint != nil {
+		// service has no user-defined endpoints while has already allocated network resources,
+		// need deallocated.
+		if err := nc.nwkAllocator.ServiceDeallocate(s); err != nil {
+			return err
+		}
 	}
 
 	if err := nc.nwkAllocator.ServiceAllocate(s); err != nil {

+ 12 - 1
vendor/src/github.com/docker/swarmkit/manager/allocator/networkallocator/portallocator.go

@@ -155,7 +155,18 @@ func (pa *portAllocator) serviceDeallocatePorts(s *api.Service) {
 }
 
 func (pa *portAllocator) isPortsAllocated(s *api.Service) bool {
-	if s.Endpoint == nil {
+	// If service has no user-defined endpoint and allocated endpoint,
+	// we assume it is allocated and return true.
+	if s.Endpoint == nil && s.Spec.Endpoint == nil {
+		return true
+	}
+
+	// If service has allocated endpoint while has no user-defined endpoint,
+	// we assume allocated endpoints are redudant, and they need deallocated.
+	// If service has no allocated endpoint while has user-defined endpoint,
+	// we assume it is not allocated.
+	if (s.Endpoint != nil && s.Spec.Endpoint == nil) ||
+		(s.Endpoint == nil && s.Spec.Endpoint != nil) {
 		return false
 	}
 

+ 0 - 12
vendor/src/github.com/docker/swarmkit/manager/controlapi/hackpicker/cluster.go

@@ -1,12 +0,0 @@
-package hackpicker
-
-// AddrSelector is interface which should track cluster for its leader address.
-type AddrSelector interface {
-	LeaderAddr() (string, error)
-}
-
-// RaftCluster is interface which combines useful methods for clustering.
-type RaftCluster interface {
-	AddrSelector
-	IsLeader() bool
-}

+ 0 - 141
vendor/src/github.com/docker/swarmkit/manager/controlapi/hackpicker/raftpicker.go

@@ -1,141 +0,0 @@
-// Package hackpicker is temporary solution to provide more seamless experience
-// for controlapi. It has drawback of slow reaction to leader change, but it
-// tracks leader automatically without erroring out to client.
-package hackpicker
-
-import (
-	"sync"
-
-	"golang.org/x/net/context"
-	"google.golang.org/grpc"
-	"google.golang.org/grpc/transport"
-)
-
-// picker always picks address of cluster leader.
-type picker struct {
-	mu   sync.Mutex
-	addr string
-	raft AddrSelector
-	conn *grpc.Conn
-	cc   *grpc.ClientConn
-}
-
-// Init does initial processing for the Picker, e.g., initiate some connections.
-func (p *picker) Init(cc *grpc.ClientConn) error {
-	p.cc = cc
-	return nil
-}
-
-func (p *picker) initConn() error {
-	if p.conn == nil {
-		conn, err := grpc.NewConn(p.cc)
-		if err != nil {
-			return err
-		}
-		p.conn = conn
-	}
-	return nil
-}
-
-// Pick blocks until either a transport.ClientTransport is ready for the upcoming RPC
-// or some error happens.
-func (p *picker) Pick(ctx context.Context) (transport.ClientTransport, error) {
-	p.mu.Lock()
-	if err := p.initConn(); err != nil {
-		p.mu.Unlock()
-		return nil, err
-	}
-	p.mu.Unlock()
-
-	addr, err := p.raft.LeaderAddr()
-	if err != nil {
-		return nil, err
-	}
-	p.mu.Lock()
-	if p.addr != addr {
-		p.addr = addr
-		p.conn.NotifyReset()
-	}
-	p.mu.Unlock()
-	return p.conn.Wait(ctx)
-}
-
-// PickAddr picks a peer address for connecting. This will be called repeated for
-// connecting/reconnecting.
-func (p *picker) PickAddr() (string, error) {
-	addr, err := p.raft.LeaderAddr()
-	if err != nil {
-		return "", err
-	}
-	p.mu.Lock()
-	p.addr = addr
-	p.mu.Unlock()
-	return addr, nil
-}
-
-// State returns the connectivity state of the underlying connections.
-func (p *picker) State() (grpc.ConnectivityState, error) {
-	return p.conn.State(), nil
-}
-
-// WaitForStateChange blocks until the state changes to something other than
-// the sourceState. It returns the new state or error.
-func (p *picker) WaitForStateChange(ctx context.Context, sourceState grpc.ConnectivityState) (grpc.ConnectivityState, error) {
-	return p.conn.WaitForStateChange(ctx, sourceState)
-}
-
-// Reset the current connection and force a reconnect to another address.
-func (p *picker) Reset() error {
-	p.conn.NotifyReset()
-	return nil
-}
-
-// Close closes all the Conn's owned by this Picker.
-func (p *picker) Close() error {
-	return p.conn.Close()
-}
-
-// ConnSelector is struct for obtaining connection with raftpicker.
-type ConnSelector struct {
-	mu      sync.Mutex
-	cc      *grpc.ClientConn
-	cluster RaftCluster
-	opts    []grpc.DialOption
-}
-
-// NewConnSelector returns new ConnSelector with cluster and grpc.DialOpts which
-// will be used for Dial on first call of Conn.
-func NewConnSelector(cluster RaftCluster, opts ...grpc.DialOption) *ConnSelector {
-	return &ConnSelector{
-		cluster: cluster,
-		opts:    opts,
-	}
-}
-
-// Conn returns *grpc.ClientConn with picker which picks raft cluster leader.
-// Internal connection estabilished lazily on this call.
-// It can return error if cluster wasn't ready at the moment of initial call.
-func (c *ConnSelector) Conn() (*grpc.ClientConn, error) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	if c.cc != nil {
-		return c.cc, nil
-	}
-	addr, err := c.cluster.LeaderAddr()
-	if err != nil {
-		return nil, err
-	}
-	picker := &picker{raft: c.cluster, addr: addr}
-	opts := append(c.opts, grpc.WithPicker(picker))
-	cc, err := grpc.Dial(addr, opts...)
-	if err != nil {
-		return nil, err
-	}
-	c.cc = cc
-	return c.cc, nil
-}
-
-// Reset does nothing for hackpicker.
-func (c *ConnSelector) Reset() error {
-	return nil
-}

+ 7 - 6
vendor/src/github.com/docker/swarmkit/manager/controlapi/service.go

@@ -5,7 +5,7 @@ import (
 	"reflect"
 	"strconv"
 
-	"github.com/docker/engine-api/types/reference"
+	"github.com/docker/distribution/reference"
 	"github.com/docker/swarmkit/api"
 	"github.com/docker/swarmkit/identity"
 	"github.com/docker/swarmkit/manager/scheduler"
@@ -133,7 +133,7 @@ func validateTask(taskSpec api.TaskSpec) error {
 		return grpc.Errorf(codes.InvalidArgument, "ContainerSpec: image reference must be provided")
 	}
 
-	if _, _, err := reference.Parse(container.Image); err != nil {
+	if _, err := reference.ParseNamed(container.Image); err != nil {
 		return grpc.Errorf(codes.InvalidArgument, "ContainerSpec: %q is not a valid repository/tag", container.Image)
 	}
 	return nil
@@ -149,13 +149,13 @@ func validateEndpointSpec(epSpec *api.EndpointSpec) error {
 		return grpc.Errorf(codes.InvalidArgument, "EndpointSpec: ports can't be used with dnsrr mode")
 	}
 
-	portSet := make(map[api.PortConfig]struct{})
+	portSet := make(map[uint32]struct{})
 	for _, port := range epSpec.Ports {
-		if _, ok := portSet[*port]; ok {
-			return grpc.Errorf(codes.InvalidArgument, "EndpointSpec: duplicate ports provided")
+		if _, ok := portSet[port.PublishedPort]; ok {
+			return grpc.Errorf(codes.InvalidArgument, "EndpointSpec: duplicate published ports provided")
 		}
 
-		portSet[*port] = struct{}{}
+		portSet[port.PublishedPort] = struct{}{}
 	}
 
 	return nil
@@ -350,6 +350,7 @@ func (s *Server) UpdateService(ctx context.Context, request *api.UpdateServiceRe
 			return errModeChangeNotAllowed
 		}
 		service.Meta.Version = *request.ServiceVersion
+		service.PreviousSpec = service.Spec.Copy()
 		service.Spec = *request.Spec.Copy()
 
 		// Reset update status

+ 244 - 38
vendor/src/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go

@@ -3,6 +3,7 @@ package dispatcher
 import (
 	"errors"
 	"fmt"
+	"strconv"
 	"sync"
 	"time"
 
@@ -41,6 +42,9 @@ const (
 	// into a single transaction. A fraction of a second feels about
 	// right.
 	maxBatchInterval = 100 * time.Millisecond
+
+	modificationBatchLimit = 100
+	batchingWaitTime       = 100 * time.Millisecond
 )
 
 var (
@@ -127,8 +131,6 @@ func New(cluster Cluster, c *Config) *Dispatcher {
 		nodes:                 newNodeStore(c.HeartbeatPeriod, c.HeartbeatEpsilon, c.GracePeriodMultiplier, c.RateLimitPeriod),
 		store:                 cluster.MemoryStore(),
 		cluster:               cluster,
-		mgrQueue:              watch.NewQueue(),
-		keyMgrQueue:           watch.NewQueue(),
 		taskUpdates:           make(map[string]*api.TaskStatus),
 		nodeUpdates:           make(map[string]nodeUpdate),
 		processUpdatesTrigger: make(chan struct{}, 1),
@@ -195,6 +197,9 @@ func (d *Dispatcher) Run(ctx context.Context) error {
 		d.mu.Unlock()
 		return err
 	}
+	// set queues here to guarantee that Close will close them
+	d.mgrQueue = watch.NewQueue()
+	d.keyMgrQueue = watch.NewQueue()
 
 	peerWatcher, peerCancel := d.cluster.SubscribePeers()
 	defer peerCancel()
@@ -351,26 +356,10 @@ func (d *Dispatcher) isRunning() bool {
 	return true
 }
 
-// register is used for registration of node with particular dispatcher.
-func (d *Dispatcher) register(ctx context.Context, nodeID string, description *api.NodeDescription) (string, error) {
-	// prevent register until we're ready to accept it
-	if err := d.isRunningLocked(); err != nil {
-		return "", err
-	}
-
-	if err := d.nodes.CheckRateLimit(nodeID); err != nil {
-		return "", err
-	}
-
-	// TODO(stevvooe): Validate node specification.
-	var node *api.Node
-	d.store.View(func(tx store.ReadTx) {
-		node = store.GetNode(tx, nodeID)
-	})
-	if node == nil {
-		return "", ErrNodeNotFound
-	}
-
+// updateNode updates the description of a node and sets status to READY
+// this is used during registration when a new node description is provided
+// and during node updates when the node description changes
+func (d *Dispatcher) updateNode(nodeID string, description *api.NodeDescription) error {
 	d.nodeUpdatesLock.Lock()
 	d.nodeUpdates[nodeID] = nodeUpdate{status: &api.NodeStatus{State: api.NodeStatus_READY}, description: description}
 	numUpdates := len(d.nodeUpdates)
@@ -380,7 +369,7 @@ func (d *Dispatcher) register(ctx context.Context, nodeID string, description *a
 		select {
 		case d.processUpdatesTrigger <- struct{}{}:
 		case <-d.ctx.Done():
-			return "", d.ctx.Err()
+			return d.ctx.Err()
 		}
 
 	}
@@ -389,12 +378,39 @@ func (d *Dispatcher) register(ctx context.Context, nodeID string, description *a
 	d.processUpdatesLock.Lock()
 	select {
 	case <-d.ctx.Done():
-		return "", d.ctx.Err()
+		return d.ctx.Err()
 	default:
 	}
 	d.processUpdatesCond.Wait()
 	d.processUpdatesLock.Unlock()
 
+	return nil
+}
+
+// register is used for registration of node with particular dispatcher.
+func (d *Dispatcher) register(ctx context.Context, nodeID string, description *api.NodeDescription) (string, error) {
+	// prevent register until we're ready to accept it
+	if err := d.isRunningLocked(); err != nil {
+		return "", err
+	}
+
+	if err := d.nodes.CheckRateLimit(nodeID); err != nil {
+		return "", err
+	}
+
+	// TODO(stevvooe): Validate node specification.
+	var node *api.Node
+	d.store.View(func(tx store.ReadTx) {
+		node = store.GetNode(tx, nodeID)
+	})
+	if node == nil {
+		return "", ErrNodeNotFound
+	}
+
+	if err := d.updateNode(nodeID, description); err != nil {
+		return "", err
+	}
+
 	expireFunc := func() {
 		nodeStatus := api.NodeStatus{State: api.NodeStatus_DOWN, Message: "heartbeat failure"}
 		log.G(ctx).Debugf("heartbeat expiration")
@@ -657,14 +673,10 @@ func (d *Dispatcher) Tasks(r *api.TasksRequest, stream api.Dispatcher_TasksServe
 		}
 
 		// bursty events should be processed in batches and sent out snapshot
-		const (
-			modificationBatchLimit = 200
-			eventPausedGap         = 50 * time.Millisecond
-		)
 		var (
-			modificationCnt    int
-			eventPausedTimer   *time.Timer
-			eventPausedTimeout <-chan time.Time
+			modificationCnt int
+			batchingTimer   *time.Timer
+			batchingTimeout <-chan time.Time
 		)
 
 	batchingLoop:
@@ -692,13 +704,189 @@ func (d *Dispatcher) Tasks(r *api.TasksRequest, stream api.Dispatcher_TasksServe
 					delete(tasksMap, v.Task.ID)
 					modificationCnt++
 				}
-				if eventPausedTimer != nil {
-					eventPausedTimer.Reset(eventPausedGap)
+				if batchingTimer != nil {
+					batchingTimer.Reset(batchingWaitTime)
 				} else {
-					eventPausedTimer = time.NewTimer(eventPausedGap)
-					eventPausedTimeout = eventPausedTimer.C
+					batchingTimer = time.NewTimer(batchingWaitTime)
+					batchingTimeout = batchingTimer.C
+				}
+			case <-batchingTimeout:
+				break batchingLoop
+			case <-stream.Context().Done():
+				return stream.Context().Err()
+			case <-d.ctx.Done():
+				return d.ctx.Err()
+			}
+		}
+
+		if batchingTimer != nil {
+			batchingTimer.Stop()
+		}
+	}
+}
+
+// Assignments is a stream of assignments for a node. Each message contains
+// either full list of tasks and secrets for the node, or an incremental update.
+func (d *Dispatcher) Assignments(r *api.AssignmentsRequest, stream api.Dispatcher_AssignmentsServer) error {
+	nodeInfo, err := ca.RemoteNode(stream.Context())
+	if err != nil {
+		return err
+	}
+	nodeID := nodeInfo.NodeID
+
+	if err := d.isRunningLocked(); err != nil {
+		return err
+	}
+
+	fields := logrus.Fields{
+		"node.id":      nodeID,
+		"node.session": r.SessionID,
+		"method":       "(*Dispatcher).Assignments",
+	}
+	if nodeInfo.ForwardedBy != nil {
+		fields["forwarder.id"] = nodeInfo.ForwardedBy.NodeID
+	}
+	log := log.G(stream.Context()).WithFields(fields)
+	log.Debugf("")
+
+	if _, err = d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
+		return err
+	}
+
+	var (
+		sequence  int64
+		appliesTo string
+		initial   api.AssignmentsMessage
+	)
+	tasksMap := make(map[string]*api.Task)
+
+	sendMessage := func(msg api.AssignmentsMessage, assignmentType api.AssignmentsMessage_Type) error {
+		sequence++
+		msg.AppliesTo = appliesTo
+		msg.ResultsIn = strconv.FormatInt(sequence, 10)
+		appliesTo = msg.ResultsIn
+		msg.Type = assignmentType
+
+		if err := stream.Send(&msg); err != nil {
+			return err
+		}
+		return nil
+	}
+
+	// TODO(aaronl): Also send node secrets that should be exposed to
+	// this node.
+	nodeTasks, cancel, err := store.ViewAndWatch(
+		d.store,
+		func(readTx store.ReadTx) error {
+			tasks, err := store.FindTasks(readTx, store.ByNodeID(nodeID))
+			if err != nil {
+				return err
+			}
+
+			for _, t := range tasks {
+				// We only care about tasks that are ASSIGNED or
+				// higher. If the state is below ASSIGNED, the
+				// task may not meet the constraints for this
+				// node, so we have to be careful about sending
+				// secrets associated with it.
+				if t.Status.State < api.TaskStateAssigned {
+					continue
+				}
+
+				tasksMap[t.ID] = t
+				initial.UpdateTasks = append(initial.UpdateTasks, t)
+			}
+			return nil
+		},
+		state.EventUpdateTask{Task: &api.Task{NodeID: nodeID},
+			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
+		state.EventDeleteTask{Task: &api.Task{NodeID: nodeID},
+			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
+	)
+	if err != nil {
+		return err
+	}
+	defer cancel()
+
+	if err := sendMessage(initial, api.AssignmentsMessage_COMPLETE); err != nil {
+		return err
+	}
+
+	for {
+		// Check for session expiration
+		if _, err := d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
+			return err
+		}
+
+		// bursty events should be processed in batches and sent out together
+		var (
+			update          api.AssignmentsMessage
+			modificationCnt int
+			batchingTimer   *time.Timer
+			batchingTimeout <-chan time.Time
+			updateTasks     = make(map[string]*api.Task)
+			removeTasks     = make(map[string]struct{})
+		)
+
+		oneModification := func() {
+			modificationCnt++
+
+			if batchingTimer != nil {
+				batchingTimer.Reset(batchingWaitTime)
+			} else {
+				batchingTimer = time.NewTimer(batchingWaitTime)
+				batchingTimeout = batchingTimer.C
+			}
+		}
+
+		// The batching loop waits for 50 ms after the most recent
+		// change, or until modificationBatchLimit is reached. The
+		// worst case latency is modificationBatchLimit * batchingWaitTime,
+		// which is 10 seconds.
+	batchingLoop:
+		for modificationCnt < modificationBatchLimit {
+			select {
+			case event := <-nodeTasks:
+				switch v := event.(type) {
+				// We don't monitor EventCreateTask because tasks are
+				// never created in the ASSIGNED state. First tasks are
+				// created by the orchestrator, then the scheduler moves
+				// them to ASSIGNED. If this ever changes, we will need
+				// to monitor task creations as well.
+				case state.EventUpdateTask:
+					// We only care about tasks that are ASSIGNED or
+					// higher.
+					if v.Task.Status.State < api.TaskStateAssigned {
+						continue
+					}
+
+					if oldTask, exists := tasksMap[v.Task.ID]; exists {
+						// States ASSIGNED and below are set by the orchestrator/scheduler,
+						// not the agent, so tasks in these states need to be sent to the
+						// agent even if nothing else has changed.
+						if equality.TasksEqualStable(oldTask, v.Task) && v.Task.Status.State > api.TaskStateAssigned {
+							// this update should not trigger a task change for the agent
+							tasksMap[v.Task.ID] = v.Task
+							continue
+						}
+					}
+					tasksMap[v.Task.ID] = v.Task
+					updateTasks[v.Task.ID] = v.Task
+
+					oneModification()
+				case state.EventDeleteTask:
+
+					if _, exists := tasksMap[v.Task.ID]; !exists {
+						continue
+					}
+
+					removeTasks[v.Task.ID] = struct{}{}
+
+					delete(tasksMap, v.Task.ID)
+
+					oneModification()
 				}
-			case <-eventPausedTimeout:
+			case <-batchingTimeout:
 				break batchingLoop
 			case <-stream.Context().Done():
 				return stream.Context().Err()
@@ -707,8 +895,22 @@ func (d *Dispatcher) Tasks(r *api.TasksRequest, stream api.Dispatcher_TasksServe
 			}
 		}
 
-		if eventPausedTimer != nil {
-			eventPausedTimer.Stop()
+		if batchingTimer != nil {
+			batchingTimer.Stop()
+		}
+
+		if modificationCnt > 0 {
+			for id, task := range updateTasks {
+				if _, ok := removeTasks[id]; !ok {
+					update.UpdateTasks = append(update.UpdateTasks, task)
+				}
+			}
+			for id := range removeTasks {
+				update.RemoveTasks = append(update.RemoveTasks, id)
+			}
+			if err := sendMessage(update, api.AssignmentsMessage_INCREMENTAL); err != nil {
+				return err
+			}
 		}
 	}
 }
@@ -787,6 +989,10 @@ func (d *Dispatcher) Session(r *api.SessionRequest, stream api.Dispatcher_Sessio
 		}
 	} else {
 		sessionID = r.SessionID
+		// update the node description
+		if err := d.updateNode(nodeID, r.Description); err != nil {
+			return err
+		}
 	}
 
 	fields := logrus.Fields{

+ 7 - 33
vendor/src/github.com/docker/swarmkit/manager/manager.go

@@ -9,7 +9,6 @@ import (
 	"path/filepath"
 	"sync"
 	"syscall"
-	"time"
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/go-events"
@@ -18,12 +17,10 @@ import (
 	"github.com/docker/swarmkit/log"
 	"github.com/docker/swarmkit/manager/allocator"
 	"github.com/docker/swarmkit/manager/controlapi"
-	"github.com/docker/swarmkit/manager/controlapi/hackpicker"
 	"github.com/docker/swarmkit/manager/dispatcher"
 	"github.com/docker/swarmkit/manager/health"
 	"github.com/docker/swarmkit/manager/keymanager"
 	"github.com/docker/swarmkit/manager/orchestrator"
-	"github.com/docker/swarmkit/manager/raftpicker"
 	"github.com/docker/swarmkit/manager/resourceapi"
 	"github.com/docker/swarmkit/manager/scheduler"
 	"github.com/docker/swarmkit/manager/state/raft"
@@ -92,7 +89,6 @@ type Manager struct {
 	server                 *grpc.Server
 	localserver            *grpc.Server
 	RaftNode               *raft.Node
-	connSelector           *raftpicker.ConnSelector
 
 	mu sync.Mutex
 
@@ -250,25 +246,6 @@ func (m *Manager) Run(parent context.Context) error {
 
 	go m.handleLeadershipEvents(ctx, leadershipCh)
 
-	proxyOpts := []grpc.DialOption{
-		grpc.WithTimeout(5 * time.Second),
-		grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds),
-	}
-
-	cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...)
-	m.connSelector = cs
-
-	// We need special connSelector for controlapi because it provides automatic
-	// leader tracking.
-	// Other APIs are using connSelector which errors out on leader change, but
-	// allows to react quickly to reelections.
-	controlAPIProxyOpts := []grpc.DialOption{
-		grpc.WithBackoffMaxDelay(time.Second),
-		grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds),
-	}
-
-	controlAPIConnSelector := hackpicker.NewConnSelector(m.RaftNode, controlAPIProxyOpts...)
-
 	authorize := func(ctx context.Context, roles []string) error {
 		// Authorize the remote roles, ensure they can only be forwarded by managers
 		_, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization())
@@ -289,11 +266,11 @@ func (m *Manager) Run(parent context.Context) error {
 	authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize)
 	authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize)
 
-	proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
-	proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
-	proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
-	proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
-	proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
+	proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, m.RaftNode, ca.WithMetadataForwardTLSInfo)
+	proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, m.RaftNode, ca.WithMetadataForwardTLSInfo)
+	proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, m.RaftNode, ca.WithMetadataForwardTLSInfo)
+	proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, m.RaftNode, ca.WithMetadataForwardTLSInfo)
+	proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, m.RaftNode, ca.WithMetadataForwardTLSInfo)
 
 	// localProxyControlAPI is a special kind of proxy. It is only wired up
 	// to receive requests from a trusted local socket, and these requests
@@ -302,7 +279,7 @@ func (m *Manager) Run(parent context.Context) error {
 	// this manager rather than forwarded requests (it has no TLS
 	// information to put in the metadata map).
 	forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil }
-	localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, controlAPIConnSelector, m.RaftNode, forwardAsOwnRequest)
+	localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, m.RaftNode, forwardAsOwnRequest)
 
 	// Everything registered on m.server should be an authenticated
 	// wrapper, or a proxy wrapping an authenticated wrapper!
@@ -318,7 +295,7 @@ func (m *Manager) Run(parent context.Context) error {
 	api.RegisterControlServer(m.localserver, localProxyControlAPI)
 	api.RegisterHealthServer(m.localserver, localHealthServer)
 
-	errServe := make(chan error, 2)
+	errServe := make(chan error, len(m.listeners))
 	for proto, l := range m.listeners {
 		go m.serveListener(ctx, errServe, proto, l)
 	}
@@ -433,9 +410,6 @@ func (m *Manager) Stop(ctx context.Context) {
 		m.keyManager.Stop()
 	}
 
-	if m.connSelector != nil {
-		m.connSelector.Stop()
-	}
 	m.RaftNode.Shutdown()
 	// some time after this point, Run will receive an error from one of these
 	m.server.Stop()

+ 4 - 3
vendor/src/github.com/docker/swarmkit/manager/orchestrator/restart.go

@@ -346,7 +346,8 @@ func (r *RestartSupervisor) DelayStart(ctx context.Context, _ store.Tx, oldTask
 			close(doneCh)
 		}()
 
-		oldTaskTimeout := time.After(r.taskTimeout)
+		oldTaskTimer := time.NewTimer(r.taskTimeout)
+		defer oldTaskTimer.Stop()
 
 		// Wait for the delay to elapse, if one is specified.
 		if delay != 0 {
@@ -357,10 +358,10 @@ func (r *RestartSupervisor) DelayStart(ctx context.Context, _ store.Tx, oldTask
 			}
 		}
 
-		if waitStop {
+		if waitStop && oldTask != nil {
 			select {
 			case <-watch:
-			case <-oldTaskTimeout:
+			case <-oldTaskTimer.C:
 			case <-ctx.Done():
 				return
 			}

+ 195 - 41
vendor/src/github.com/docker/swarmkit/manager/orchestrator/updater.go

@@ -1,6 +1,7 @@
 package orchestrator
 
 import (
+	"errors"
 	"fmt"
 	"reflect"
 	"sync"
@@ -17,6 +18,8 @@ import (
 	"github.com/docker/swarmkit/protobuf/ptypes"
 )
 
+const defaultMonitor = 30 * time.Second
+
 // UpdateSupervisor supervises a set of updates. It's responsible for keeping track of updates,
 // shutting them down and replacing them.
 type UpdateSupervisor struct {
@@ -49,7 +52,7 @@ func (u *UpdateSupervisor) Update(ctx context.Context, cluster *api.Cluster, ser
 	id := service.ID
 
 	if update, ok := u.updates[id]; ok {
-		if !update.isServiceDirty(service) {
+		if reflect.DeepEqual(service.Spec, update.newService.Spec) {
 			// There's already an update working towards this goal.
 			return
 		}
@@ -87,6 +90,9 @@ type Updater struct {
 	cluster    *api.Cluster
 	newService *api.Service
 
+	updatedTasks   map[string]time.Time // task ID to creation time
+	updatedTasksMu sync.Mutex
+
 	// stopChan signals to the state machine to stop running.
 	stopChan chan struct{}
 	// doneChan is closed when the state machine terminates.
@@ -96,13 +102,14 @@ type Updater struct {
 // NewUpdater creates a new Updater.
 func NewUpdater(store *store.MemoryStore, restartSupervisor *RestartSupervisor, cluster *api.Cluster, newService *api.Service) *Updater {
 	return &Updater{
-		store:      store,
-		watchQueue: store.WatchQueue(),
-		restarts:   restartSupervisor,
-		cluster:    cluster.Copy(),
-		newService: newService.Copy(),
-		stopChan:   make(chan struct{}),
-		doneChan:   make(chan struct{}),
+		store:        store,
+		watchQueue:   store.WatchQueue(),
+		restarts:     restartSupervisor,
+		cluster:      cluster.Copy(),
+		newService:   newService.Copy(),
+		updatedTasks: make(map[string]time.Time),
+		stopChan:     make(chan struct{}),
+		doneChan:     make(chan struct{}),
 	}
 }
 
@@ -119,7 +126,9 @@ func (u *Updater) Run(ctx context.Context, slots []slot) {
 	service := u.newService
 
 	// If the update is in a PAUSED state, we should not do anything.
-	if service.UpdateStatus != nil && service.UpdateStatus.State == api.UpdateStatus_PAUSED {
+	if service.UpdateStatus != nil &&
+		(service.UpdateStatus.State == api.UpdateStatus_PAUSED ||
+			service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_PAUSED) {
 		return
 	}
 
@@ -131,7 +140,9 @@ func (u *Updater) Run(ctx context.Context, slots []slot) {
 	}
 	// Abort immediately if all tasks are clean.
 	if len(dirtySlots) == 0 {
-		if service.UpdateStatus != nil && service.UpdateStatus.State == api.UpdateStatus_UPDATING {
+		if service.UpdateStatus != nil &&
+			(service.UpdateStatus.State == api.UpdateStatus_UPDATING ||
+				service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED) {
 			u.completeUpdate(ctx, service.ID)
 		}
 		return
@@ -163,9 +174,26 @@ func (u *Updater) Run(ctx context.Context, slots []slot) {
 		}()
 	}
 
+	failureAction := api.UpdateConfig_PAUSE
+	allowedFailureFraction := float32(0)
+	monitoringPeriod := defaultMonitor
+
+	if service.Spec.Update != nil {
+		failureAction = service.Spec.Update.FailureAction
+		allowedFailureFraction = service.Spec.Update.AllowedFailureFraction
+
+		if service.Spec.Update.Monitor != nil {
+			var err error
+			monitoringPeriod, err = ptypes.Duration(service.Spec.Update.Monitor)
+			if err != nil {
+				monitoringPeriod = defaultMonitor
+			}
+		}
+	}
+
 	var failedTaskWatch chan events.Event
 
-	if service.Spec.Update == nil || service.Spec.Update.FailureAction == api.UpdateConfig_PAUSE {
+	if failureAction != api.UpdateConfig_CONTINUE {
 		var cancelWatch func()
 		failedTaskWatch, cancelWatch = state.Watch(
 			u.store.WatchQueue(),
@@ -178,6 +206,49 @@ func (u *Updater) Run(ctx context.Context, slots []slot) {
 	}
 
 	stopped := false
+	failedTasks := make(map[string]struct{})
+	totalFailures := 0
+
+	failureTriggersAction := func(failedTask *api.Task) bool {
+		// Ignore tasks we have already seen as failures.
+		if _, found := failedTasks[failedTask.ID]; found {
+			return false
+		}
+
+		// If this failed/completed task is one that we
+		// created as part of this update, we should
+		// follow the failure action.
+		u.updatedTasksMu.Lock()
+		startedAt, found := u.updatedTasks[failedTask.ID]
+		u.updatedTasksMu.Unlock()
+
+		if found && (startedAt.IsZero() || time.Since(startedAt) <= monitoringPeriod) {
+			failedTasks[failedTask.ID] = struct{}{}
+			totalFailures++
+			if float32(totalFailures)/float32(len(dirtySlots)) > allowedFailureFraction {
+				switch failureAction {
+				case api.UpdateConfig_PAUSE:
+					stopped = true
+					message := fmt.Sprintf("update paused due to failure or early termination of task %s", failedTask.ID)
+					u.pauseUpdate(ctx, service.ID, message)
+					return true
+				case api.UpdateConfig_ROLLBACK:
+					// Never roll back a rollback
+					if service.UpdateStatus != nil && service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED {
+						message := fmt.Sprintf("rollback paused due to failure or early termination of task %s", failedTask.ID)
+						u.pauseUpdate(ctx, service.ID, message)
+						return true
+					}
+					stopped = true
+					message := fmt.Sprintf("update rolled back due to failure or early termination of task %s", failedTask.ID)
+					u.rollbackUpdate(ctx, service.ID, message)
+					return true
+				}
+			}
+		}
+
+		return false
+	}
 
 slotsLoop:
 	for _, slot := range dirtySlots {
@@ -189,15 +260,7 @@ slotsLoop:
 				stopped = true
 				break slotsLoop
 			case ev := <-failedTaskWatch:
-				failedTask := ev.(state.EventUpdateTask).Task
-
-				// If this failed/completed task has a spec matching
-				// the one we're updating to, we should pause the
-				// update.
-				if !u.isTaskDirty(failedTask) {
-					stopped = true
-					message := fmt.Sprintf("update paused due to failure or early termination of task %s", failedTask.ID)
-					u.pauseUpdate(ctx, service.ID, message)
+				if failureTriggersAction(ev.(state.EventUpdateTask).Task) {
 					break slotsLoop
 				}
 			case slotQueue <- slot:
@@ -209,6 +272,29 @@ slotsLoop:
 	close(slotQueue)
 	wg.Wait()
 
+	if !stopped {
+		// Keep watching for task failures for one more monitoringPeriod,
+		// before declaring the update complete.
+		doneMonitoring := time.After(monitoringPeriod)
+	monitorLoop:
+		for {
+			select {
+			case <-u.stopChan:
+				stopped = true
+				break monitorLoop
+			case <-doneMonitoring:
+				break monitorLoop
+			case ev := <-failedTaskWatch:
+				if failureTriggersAction(ev.(state.EventUpdateTask).Task) {
+					break monitorLoop
+				}
+			}
+		}
+	}
+
+	// TODO(aaronl): Potentially roll back the service if not enough tasks
+	// have reached RUNNING by this point.
+
 	if !stopped {
 		u.completeUpdate(ctx, service.ID)
 	}
@@ -237,9 +323,13 @@ func (u *Updater) worker(ctx context.Context, queue <-chan slot) {
 			}
 		}
 		if runningTask != nil {
-			u.useExistingTask(ctx, slot, runningTask)
+			if err := u.useExistingTask(ctx, slot, runningTask); err != nil {
+				log.G(ctx).WithError(err).Error("update failed")
+			}
 		} else if cleanTask != nil {
-			u.useExistingTask(ctx, slot, cleanTask)
+			if err := u.useExistingTask(ctx, slot, cleanTask); err != nil {
+				log.G(ctx).WithError(err).Error("update failed")
+			}
 		} else {
 			updated := newTask(u.cluster, u.newService, slot[0].Slot)
 			updated.DesiredState = api.TaskStateReady
@@ -275,10 +365,22 @@ func (u *Updater) updateTask(ctx context.Context, slot slot, updated *api.Task)
 	})
 	defer cancel()
 
+	// Create an empty entry for this task, so the updater knows a failure
+	// should count towards the failure count. The timestamp is added
+	// if/when the task reaches RUNNING.
+	u.updatedTasksMu.Lock()
+	u.updatedTasks[updated.ID] = time.Time{}
+	u.updatedTasksMu.Unlock()
+
 	var delayStartCh <-chan struct{}
 	// Atomically create the updated task and bring down the old one.
 	_, err := u.store.Batch(func(batch *store.Batch) error {
-		err := batch.Update(func(tx store.Tx) error {
+		oldTask, err := u.removeOldTasks(ctx, batch, slot)
+		if err != nil {
+			return err
+		}
+
+		err = batch.Update(func(tx store.Tx) error {
 			if err := store.CreateTask(tx, updated); err != nil {
 				return err
 			}
@@ -288,7 +390,6 @@ func (u *Updater) updateTask(ctx context.Context, slot slot, updated *api.Task)
 			return err
 		}
 
-		oldTask := u.removeOldTasks(ctx, batch, slot)
 		delayStartCh = u.restarts.DelayStart(ctx, nil, oldTask, updated.ID, 0, true)
 
 		return nil
@@ -309,6 +410,9 @@ func (u *Updater) updateTask(ctx context.Context, slot slot, updated *api.Task)
 		case e := <-taskUpdates:
 			updated = e.(state.EventUpdateTask).Task
 			if updated.Status.State >= api.TaskStateRunning {
+				u.updatedTasksMu.Lock()
+				u.updatedTasks[updated.ID] = time.Now()
+				u.updatedTasksMu.Unlock()
 				return nil
 			}
 		case <-u.stopChan:
@@ -317,7 +421,7 @@ func (u *Updater) updateTask(ctx context.Context, slot slot, updated *api.Task)
 	}
 }
 
-func (u *Updater) useExistingTask(ctx context.Context, slot slot, existing *api.Task) {
+func (u *Updater) useExistingTask(ctx context.Context, slot slot, existing *api.Task) error {
 	var removeTasks []*api.Task
 	for _, t := range slot {
 		if t != existing {
@@ -327,7 +431,14 @@ func (u *Updater) useExistingTask(ctx context.Context, slot slot, existing *api.
 	if len(removeTasks) != 0 || existing.DesiredState != api.TaskStateRunning {
 		var delayStartCh <-chan struct{}
 		_, err := u.store.Batch(func(batch *store.Batch) error {
-			oldTask := u.removeOldTasks(ctx, batch, removeTasks)
+			var oldTask *api.Task
+			if len(removeTasks) != 0 {
+				var err error
+				oldTask, err = u.removeOldTasks(ctx, batch, removeTasks)
+				if err != nil {
+					return err
+				}
+			}
 
 			if existing.DesiredState != api.TaskStateRunning {
 				delayStartCh = u.restarts.DelayStart(ctx, nil, oldTask, existing.ID, 0, true)
@@ -335,19 +446,24 @@ func (u *Updater) useExistingTask(ctx context.Context, slot slot, existing *api.
 			return nil
 		})
 		if err != nil {
-			log.G(ctx).WithError(err).Error("updater batch transaction failed")
+			return err
 		}
 
 		if delayStartCh != nil {
 			<-delayStartCh
 		}
 	}
+
+	return nil
 }
 
 // removeOldTasks shuts down the given tasks and returns one of the tasks that
-// was shut down, or nil.
-func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, removeTasks []*api.Task) *api.Task {
-	var removedTask *api.Task
+// was shut down, or an error.
+func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, removeTasks []*api.Task) (*api.Task, error) {
+	var (
+		lastErr     error
+		removedTask *api.Task
+	)
 	for _, original := range removeTasks {
 		err := batch.Update(func(tx store.Tx) error {
 			t := store.GetTask(tx, original.ID)
@@ -361,13 +477,16 @@ func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, remove
 			return store.UpdateTask(tx, t)
 		})
 		if err != nil {
-			log.G(ctx).WithError(err).Errorf("shutting down stale task %s failed", original.ID)
+			lastErr = err
 		} else {
 			removedTask = original
 		}
 	}
 
-	return removedTask
+	if removedTask == nil {
+		return nil, lastErr
+	}
+	return removedTask, nil
 }
 
 func (u *Updater) isTaskDirty(t *api.Task) bool {
@@ -375,11 +494,6 @@ func (u *Updater) isTaskDirty(t *api.Task) bool {
 		(t.Endpoint != nil && !reflect.DeepEqual(u.newService.Spec.Endpoint, t.Endpoint.Spec))
 }
 
-func (u *Updater) isServiceDirty(service *api.Service) bool {
-	return !reflect.DeepEqual(u.newService.Spec.Task, service.Spec.Task) ||
-		!reflect.DeepEqual(u.newService.Spec.Endpoint, service.Spec.Endpoint)
-}
-
 func (u *Updater) isSlotDirty(slot slot) bool {
 	return len(slot) > 1 || (len(slot) == 1 && u.isTaskDirty(slot[0]))
 }
@@ -421,7 +535,11 @@ func (u *Updater) pauseUpdate(ctx context.Context, serviceID, message string) {
 			return nil
 		}
 
-		service.UpdateStatus.State = api.UpdateStatus_PAUSED
+		if service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED {
+			service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_PAUSED
+		} else {
+			service.UpdateStatus.State = api.UpdateStatus_PAUSED
+		}
 		service.UpdateStatus.Message = message
 
 		return store.UpdateService(tx, service)
@@ -432,6 +550,38 @@ func (u *Updater) pauseUpdate(ctx context.Context, serviceID, message string) {
 	}
 }
 
+func (u *Updater) rollbackUpdate(ctx context.Context, serviceID, message string) {
+	log.G(ctx).Debugf("starting rollback of service %s", serviceID)
+
+	var service *api.Service
+	err := u.store.Update(func(tx store.Tx) error {
+		service = store.GetService(tx, serviceID)
+		if service == nil {
+			return nil
+		}
+		if service.UpdateStatus == nil {
+			// The service was updated since we started this update
+			return nil
+		}
+
+		service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_STARTED
+		service.UpdateStatus.Message = message
+
+		if service.PreviousSpec == nil {
+			return errors.New("cannot roll back service because no previous spec is available")
+		}
+		service.Spec = *service.PreviousSpec
+		service.PreviousSpec = nil
+
+		return store.UpdateService(tx, service)
+	})
+
+	if err != nil {
+		log.G(ctx).WithError(err).Errorf("failed to start rollback of service %s", serviceID)
+		return
+	}
+}
+
 func (u *Updater) completeUpdate(ctx context.Context, serviceID string) {
 	log.G(ctx).Debugf("update of service %s complete", serviceID)
 
@@ -444,9 +594,13 @@ func (u *Updater) completeUpdate(ctx context.Context, serviceID string) {
 			// The service was changed since we started this update
 			return nil
 		}
-
-		service.UpdateStatus.State = api.UpdateStatus_COMPLETED
-		service.UpdateStatus.Message = "update completed"
+		if service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED {
+			service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_COMPLETED
+			service.UpdateStatus.Message = "rollback completed"
+		} else {
+			service.UpdateStatus.State = api.UpdateStatus_COMPLETED
+			service.UpdateStatus.Message = "update completed"
+		}
 		service.UpdateStatus.CompletedAt = ptypes.MustTimestampProto(time.Now())
 
 		return store.UpdateService(tx, service)

+ 0 - 12
vendor/src/github.com/docker/swarmkit/manager/raftpicker/cluster.go

@@ -1,12 +0,0 @@
-package raftpicker
-
-// AddrSelector is interface which should track cluster for its leader address.
-type AddrSelector interface {
-	LeaderAddr() (string, error)
-}
-
-// RaftCluster is interface which combines useful methods for clustering.
-type RaftCluster interface {
-	AddrSelector
-	IsLeader() bool
-}

+ 0 - 127
vendor/src/github.com/docker/swarmkit/manager/raftpicker/raftpicker.go

@@ -1,127 +0,0 @@
-package raftpicker
-
-import (
-	"sync"
-	"time"
-
-	"github.com/Sirupsen/logrus"
-
-	"google.golang.org/grpc"
-)
-
-// Interface is interface to replace implementation with controlapi/hackpicker.
-// TODO: it should be done cooler.
-type Interface interface {
-	Conn() (*grpc.ClientConn, error)
-	Reset() error
-}
-
-// ConnSelector is struct for obtaining connection connected to cluster leader.
-type ConnSelector struct {
-	mu      sync.Mutex
-	cluster RaftCluster
-	opts    []grpc.DialOption
-
-	cc   *grpc.ClientConn
-	addr string
-
-	stop chan struct{}
-}
-
-// NewConnSelector returns new ConnSelector with cluster and grpc.DialOpts which
-// will be used for connection create.
-func NewConnSelector(cluster RaftCluster, opts ...grpc.DialOption) *ConnSelector {
-	cs := &ConnSelector{
-		cluster: cluster,
-		opts:    opts,
-		stop:    make(chan struct{}),
-	}
-	go cs.updateLoop()
-	return cs
-}
-
-// Conn returns *grpc.ClientConn which connected to cluster leader.
-// It can return error if cluster wasn't ready at the moment of initial call.
-func (c *ConnSelector) Conn() (*grpc.ClientConn, error) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	if c.cc != nil {
-		return c.cc, nil
-	}
-	addr, err := c.cluster.LeaderAddr()
-	if err != nil {
-		return nil, err
-	}
-	cc, err := grpc.Dial(addr, c.opts...)
-	if err != nil {
-		return nil, err
-	}
-	c.cc = cc
-	c.addr = addr
-	return cc, nil
-}
-
-// Reset recreates underlying connection.
-func (c *ConnSelector) Reset() error {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	if c.cc != nil {
-		c.cc.Close()
-		c.cc = nil
-	}
-	addr, err := c.cluster.LeaderAddr()
-	if err != nil {
-		logrus.WithError(err).Errorf("error obtaining leader address")
-		return err
-	}
-	cc, err := grpc.Dial(addr, c.opts...)
-	if err != nil {
-		logrus.WithError(err).Errorf("error reestabilishing connection to leader")
-		return err
-	}
-	c.cc = cc
-	c.addr = addr
-	return nil
-}
-
-// Stop cancels updating connection loop.
-func (c *ConnSelector) Stop() {
-	close(c.stop)
-}
-
-func (c *ConnSelector) updateConn() error {
-	addr, err := c.cluster.LeaderAddr()
-	if err != nil {
-		return err
-	}
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	if c.addr != addr {
-		if c.cc != nil {
-			c.cc.Close()
-			c.cc = nil
-		}
-		conn, err := grpc.Dial(addr, c.opts...)
-		if err != nil {
-			return err
-		}
-		c.cc = conn
-		c.addr = addr
-	}
-	return nil
-}
-
-func (c *ConnSelector) updateLoop() {
-	ticker := time.NewTicker(1 * time.Second)
-	defer ticker.Stop()
-	for {
-		select {
-		case <-ticker.C:
-			if err := c.updateConn(); err != nil {
-				logrus.WithError(err).Errorf("error reestabilishing connection to leader")
-			}
-		case <-c.stop:
-			return
-		}
-	}
-}

+ 20 - 0
vendor/src/github.com/docker/swarmkit/manager/raftselector/raftselector.go

@@ -0,0 +1,20 @@
+package raftselector
+
+import (
+	"errors"
+
+	"golang.org/x/net/context"
+
+	"google.golang.org/grpc"
+)
+
+// ConnProvider is basic interface for connecting API package(raft proxy in particular)
+// to manager/state/raft package without import cycles. It provides only one
+// method for obtaining connection to leader.
+type ConnProvider interface {
+	LeaderConn(ctx context.Context) (*grpc.ClientConn, error)
+}
+
+// ErrIsLeader is returned from LeaderConn method when current machine is leader.
+// It's just shim between packages to avoid import cycles.
+var ErrIsLeader = errors.New("current node is leader")

+ 0 - 153
vendor/src/github.com/docker/swarmkit/manager/scheduler/indexed_node_heap.go

@@ -1,153 +0,0 @@
-package scheduler
-
-import (
-	"container/heap"
-	"errors"
-
-	"github.com/docker/swarmkit/api"
-)
-
-var errNodeNotFound = errors.New("node not found in scheduler heap")
-
-// A nodeHeap implements heap.Interface for nodes. It also includes an index
-// by node id.
-type nodeHeap struct {
-	heap  []NodeInfo
-	index map[string]int // map from node id to heap index
-}
-
-func (nh nodeHeap) Len() int {
-	return len(nh.heap)
-}
-
-func (nh nodeHeap) Less(i, j int) bool {
-	return len(nh.heap[i].Tasks) < len(nh.heap[j].Tasks)
-}
-
-func (nh nodeHeap) Swap(i, j int) {
-	nh.heap[i], nh.heap[j] = nh.heap[j], nh.heap[i]
-	nh.index[nh.heap[i].ID] = i
-	nh.index[nh.heap[j].ID] = j
-}
-
-func (nh *nodeHeap) Push(x interface{}) {
-	n := len(nh.heap)
-	item := x.(NodeInfo)
-	nh.index[item.ID] = n
-	nh.heap = append(nh.heap, item)
-}
-
-func (nh *nodeHeap) Pop() interface{} {
-	old := nh.heap
-	n := len(old)
-	item := old[n-1]
-	delete(nh.index, item.ID)
-	nh.heap = old[0 : n-1]
-	return item
-}
-
-func (nh *nodeHeap) alloc(n int) {
-	nh.heap = make([]NodeInfo, 0, n)
-	nh.index = make(map[string]int, n)
-}
-
-// nodeInfo returns the NodeInfo struct for a given node identified by its ID.
-func (nh *nodeHeap) nodeInfo(nodeID string) (NodeInfo, error) {
-	index, ok := nh.index[nodeID]
-	if ok {
-		return nh.heap[index], nil
-	}
-	return NodeInfo{}, errNodeNotFound
-}
-
-// addOrUpdateNode sets the number of tasks for a given node. It adds the node
-// to the heap if it wasn't already tracked.
-func (nh *nodeHeap) addOrUpdateNode(n NodeInfo) {
-	index, ok := nh.index[n.ID]
-	if ok {
-		nh.heap[index] = n
-		heap.Fix(nh, index)
-	} else {
-		heap.Push(nh, n)
-	}
-}
-
-// updateNode sets the number of tasks for a given node. It ignores the update
-// if the node isn't already tracked in the heap.
-func (nh *nodeHeap) updateNode(n NodeInfo) {
-	index, ok := nh.index[n.ID]
-	if ok {
-		nh.heap[index] = n
-		heap.Fix(nh, index)
-	}
-}
-
-func (nh *nodeHeap) remove(nodeID string) {
-	index, ok := nh.index[nodeID]
-	if ok {
-		heap.Remove(nh, index)
-	}
-}
-
-func (nh *nodeHeap) findMin(meetsConstraints func(*NodeInfo) bool, scanAllNodes bool) (*api.Node, int) {
-	if scanAllNodes {
-		return nh.scanAllToFindMin(meetsConstraints)
-	}
-	return nh.searchHeapToFindMin(meetsConstraints)
-}
-
-// Scan All nodes to find the best node which meets the constraints && has lightest workloads
-func (nh *nodeHeap) scanAllToFindMin(meetsConstraints func(*NodeInfo) bool) (*api.Node, int) {
-	var bestNode *api.Node
-	minTasks := int(^uint(0) >> 1) // max int
-
-	for i := 0; i < len(nh.heap); i++ {
-		heapEntry := &nh.heap[i]
-		if meetsConstraints(heapEntry) && len(heapEntry.Tasks) < minTasks {
-			bestNode = heapEntry.Node
-			minTasks = len(heapEntry.Tasks)
-		}
-	}
-
-	return bestNode, minTasks
-}
-
-// Search in heap to find the best node which meets the constraints && has lightest workloads
-func (nh *nodeHeap) searchHeapToFindMin(meetsConstraints func(*NodeInfo) bool) (*api.Node, int) {
-	var bestNode *api.Node
-	minTasks := int(^uint(0) >> 1) // max int
-
-	if nh == nil || len(nh.heap) == 0 {
-		return bestNode, minTasks
-	}
-
-	// push root to stack for search
-	stack := []int{0}
-
-	for len(stack) != 0 {
-		// pop an element
-		idx := stack[len(stack)-1]
-		stack = stack[0 : len(stack)-1]
-
-		heapEntry := &nh.heap[idx]
-
-		if len(heapEntry.Tasks) >= minTasks {
-			continue
-		}
-
-		if meetsConstraints(heapEntry) {
-			// meet constraints, update results
-			bestNode = heapEntry.Node
-			minTasks = len(heapEntry.Tasks)
-		} else {
-			// otherwise, push 2 children to stack for further search
-			if 2*idx+1 < len(nh.heap) {
-				stack = append(stack, 2*idx+1)
-			}
-			if 2*idx+2 < len(nh.heap) {
-				stack = append(stack, 2*idx+2)
-			}
-		}
-	}
-	return bestNode, minTasks
-}

+ 48 - 13
vendor/src/github.com/docker/swarmkit/manager/scheduler/nodeinfo.go

@@ -5,15 +5,18 @@ import "github.com/docker/swarmkit/api"
 // NodeInfo contains a node and some additional metadata.
 type NodeInfo struct {
 	*api.Node
-	Tasks              map[string]*api.Task
-	AvailableResources api.Resources
+	Tasks                             map[string]*api.Task
+	DesiredRunningTasksCount          int
+	DesiredRunningTasksCountByService map[string]int
+	AvailableResources                api.Resources
 }
 
 func newNodeInfo(n *api.Node, tasks map[string]*api.Task, availableResources api.Resources) NodeInfo {
 	nodeInfo := NodeInfo{
-		Node:               n,
-		Tasks:              make(map[string]*api.Task),
-		AvailableResources: availableResources,
+		Node:  n,
+		Tasks: make(map[string]*api.Task),
+		DesiredRunningTasksCountByService: make(map[string]int),
+		AvailableResources:                availableResources,
 	}
 
 	for _, t := range tasks {
@@ -22,15 +25,23 @@ func newNodeInfo(n *api.Node, tasks map[string]*api.Task, availableResources api
 	return nodeInfo
 }
 
+// addTask removes a task from nodeInfo if it's tracked there, and returns true
+// if nodeInfo was modified.
 func (nodeInfo *NodeInfo) removeTask(t *api.Task) bool {
 	if nodeInfo.Tasks == nil {
 		return false
 	}
-	if _, ok := nodeInfo.Tasks[t.ID]; !ok {
+	oldTask, ok := nodeInfo.Tasks[t.ID]
+	if !ok {
 		return false
 	}
 
 	delete(nodeInfo.Tasks, t.ID)
+	if oldTask.DesiredState == api.TaskStateRunning {
+		nodeInfo.DesiredRunningTasksCount--
+		nodeInfo.DesiredRunningTasksCountByService[t.ServiceID]--
+	}
+
 	reservations := taskReservations(t.Spec)
 	nodeInfo.AvailableResources.MemoryBytes += reservations.MemoryBytes
 	nodeInfo.AvailableResources.NanoCPUs += reservations.NanoCPUs
@@ -38,19 +49,43 @@ func (nodeInfo *NodeInfo) removeTask(t *api.Task) bool {
 	return true
 }
 
+// addTask adds or updates a task on nodeInfo, and returns true if nodeInfo was
+// modified.
 func (nodeInfo *NodeInfo) addTask(t *api.Task) bool {
 	if nodeInfo.Tasks == nil {
 		nodeInfo.Tasks = make(map[string]*api.Task)
 	}
-	if _, ok := nodeInfo.Tasks[t.ID]; !ok {
-		nodeInfo.Tasks[t.ID] = t
-		reservations := taskReservations(t.Spec)
-		nodeInfo.AvailableResources.MemoryBytes -= reservations.MemoryBytes
-		nodeInfo.AvailableResources.NanoCPUs -= reservations.NanoCPUs
-		return true
+	if nodeInfo.DesiredRunningTasksCountByService == nil {
+		nodeInfo.DesiredRunningTasksCountByService = make(map[string]int)
+	}
+
+	oldTask, ok := nodeInfo.Tasks[t.ID]
+	if ok {
+		if t.DesiredState == api.TaskStateRunning && oldTask.DesiredState != api.TaskStateRunning {
+			nodeInfo.Tasks[t.ID] = t
+			nodeInfo.DesiredRunningTasksCount++
+			nodeInfo.DesiredRunningTasksCountByService[t.ServiceID]++
+			return true
+		} else if t.DesiredState != api.TaskStateRunning && oldTask.DesiredState == api.TaskStateRunning {
+			nodeInfo.Tasks[t.ID] = t
+			nodeInfo.DesiredRunningTasksCount--
+			nodeInfo.DesiredRunningTasksCountByService[t.ServiceID]--
+			return true
+		}
+		return false
+	}
+
+	nodeInfo.Tasks[t.ID] = t
+	reservations := taskReservations(t.Spec)
+	nodeInfo.AvailableResources.MemoryBytes -= reservations.MemoryBytes
+	nodeInfo.AvailableResources.NanoCPUs -= reservations.NanoCPUs
+
+	if t.DesiredState == api.TaskStateRunning {
+		nodeInfo.DesiredRunningTasksCount++
+		nodeInfo.DesiredRunningTasksCountByService[t.ServiceID]++
 	}
 
-	return false
+	return true
 }
 
 func taskReservations(spec api.TaskSpec) (reservations api.Resources) {

+ 115 - 0
vendor/src/github.com/docker/swarmkit/manager/scheduler/nodeset.go

@@ -0,0 +1,115 @@
+package scheduler
+
+import (
+	"container/heap"
+	"errors"
+)
+
+var errNodeNotFound = errors.New("node not found in scheduler dataset")
+
+type nodeSet struct {
+	nodes map[string]NodeInfo // map from node id to node info
+}
+
+func (ns *nodeSet) alloc(n int) {
+	ns.nodes = make(map[string]NodeInfo, n)
+}
+
+// nodeInfo returns the NodeInfo struct for a given node identified by its ID.
+func (ns *nodeSet) nodeInfo(nodeID string) (NodeInfo, error) {
+	node, ok := ns.nodes[nodeID]
+	if ok {
+		return node, nil
+	}
+	return NodeInfo{}, errNodeNotFound
+}
+
+// addOrUpdateNode sets the number of tasks for a given node. It adds the node
+// to the set if it wasn't already tracked.
+func (ns *nodeSet) addOrUpdateNode(n NodeInfo) {
+	ns.nodes[n.ID] = n
+}
+
+// updateNode sets the number of tasks for a given node. It ignores the update
+// if the node isn't already tracked in the set.
+func (ns *nodeSet) updateNode(n NodeInfo) {
+	_, ok := ns.nodes[n.ID]
+	if ok {
+		ns.nodes[n.ID] = n
+	}
+}
+
+func (ns *nodeSet) remove(nodeID string) {
+	delete(ns.nodes, nodeID)
+}
+
+type nodeMaxHeap struct {
+	nodes    []NodeInfo
+	lessFunc func(*NodeInfo, *NodeInfo) bool
+	length   int
+}
+
+func (h nodeMaxHeap) Len() int {
+	return h.length
+}
+
+func (h nodeMaxHeap) Swap(i, j int) {
+	h.nodes[i], h.nodes[j] = h.nodes[j], h.nodes[i]
+}
+
+func (h nodeMaxHeap) Less(i, j int) bool {
+	// reversed to make a max-heap
+	return h.lessFunc(&h.nodes[j], &h.nodes[i])
+}
+
+func (h *nodeMaxHeap) Push(x interface{}) {
+	h.nodes = append(h.nodes, x.(NodeInfo))
+	h.length++
+}
+
+func (h *nodeMaxHeap) Pop() interface{} {
+	h.length--
+	// return value is never used
+	return nil
+}
+
+// findBestNodes returns n nodes (or < n if fewer nodes are available) that
+// rank best (lowest) according to the sorting function.
+func (ns *nodeSet) findBestNodes(n int, meetsConstraints func(*NodeInfo) bool, nodeLess func(*NodeInfo, *NodeInfo) bool) []NodeInfo {
+	if n == 0 {
+		return []NodeInfo{}
+	}
+
+	nodeHeap := nodeMaxHeap{lessFunc: nodeLess}
+
+	// TODO(aaronl): Is is possible to avoid checking constraints on every
+	// node? Perhaps we should try to schedule with n*2 nodes that weren't
+	// prescreened, and repeat the selection if there weren't enough nodes
+	// meeting the constraints.
+	for _, node := range ns.nodes {
+		// If there are fewer then n nodes in the heap, we add this
+		// node if it meets the constraints. Otherwise, the heap has
+		// n nodes, and if this node is better than the worst node in
+		// the heap, we replace the worst node and then fix the heap.
+		if nodeHeap.Len() < n {
+			if meetsConstraints(&node) {
+				heap.Push(&nodeHeap, node)
+			}
+		} else if nodeLess(&node, &nodeHeap.nodes[0]) {
+			if meetsConstraints(&node) {
+				nodeHeap.nodes[0] = node
+				heap.Fix(&nodeHeap, 0)
+			}
+		}
+	}
+
+	// Popping every element orders the nodes from best to worst. The
+	// first pop gets the worst node (since this a max-heap), and puts it
+	// at position n-1. Then the next pop puts the next-worst at n-2, and
+	// so on.
+	for nodeHeap.Len() > 0 {
+		heap.Pop(&nodeHeap)
+	}
+
+	return nodeHeap.nodes
+}

+ 130 - 59
vendor/src/github.com/docker/swarmkit/manager/scheduler/scheduler.go

@@ -1,7 +1,6 @@
 package scheduler
 
 import (
-	"container/heap"
 	"container/list"
 	"time"
 
@@ -24,7 +23,7 @@ type Scheduler struct {
 	unassignedTasks *list.List
 	// preassignedTasks already have NodeID, need resource validation
 	preassignedTasks map[string]*api.Task
-	nodeHeap         nodeHeap
+	nodeSet          nodeSet
 	allTasks         map[string]*api.Task
 	pipeline         *Pipeline
 
@@ -32,11 +31,6 @@ type Scheduler struct {
 	stopChan chan struct{}
 	// doneChan is closed when the state machine terminates
 	doneChan chan struct{}
-
-	// This currently exists only for benchmarking. It tells the scheduler
-	// scan the whole heap instead of taking the minimum-valued node
-	// blindly.
-	scanAllNodes bool
 }
 
 // New creates a new scheduler.
@@ -83,7 +77,7 @@ func (s *Scheduler) setupTasksList(tx store.ReadTx) error {
 		tasksByNode[t.NodeID][t.ID] = t
 	}
 
-	if err := s.buildNodeHeap(tx, tasksByNode); err != nil {
+	if err := s.buildNodeSet(tx, tasksByNode); err != nil {
 		return err
 	}
 
@@ -152,7 +146,7 @@ func (s *Scheduler) Run(ctx context.Context) error {
 				s.createOrUpdateNode(v.Node)
 				pendingChanges++
 			case state.EventDeleteNode:
-				s.nodeHeap.remove(v.Node.ID)
+				s.nodeSet.remove(v.Node.ID)
 			case state.EventCommit:
 				if commitDebounceTimer != nil {
 					if time.Since(debouncingStarted) > maxLatency {
@@ -210,9 +204,9 @@ func (s *Scheduler) createTask(ctx context.Context, t *api.Task) int {
 		return 0
 	}
 
-	nodeInfo, err := s.nodeHeap.nodeInfo(t.NodeID)
+	nodeInfo, err := s.nodeSet.nodeInfo(t.NodeID)
 	if err == nil && nodeInfo.addTask(t) {
-		s.nodeHeap.updateNode(nodeInfo)
+		s.nodeSet.updateNode(nodeInfo)
 	}
 
 	return 0
@@ -257,9 +251,9 @@ func (s *Scheduler) updateTask(ctx context.Context, t *api.Task) int {
 	}
 
 	s.allTasks[t.ID] = t
-	nodeInfo, err := s.nodeHeap.nodeInfo(t.NodeID)
+	nodeInfo, err := s.nodeSet.nodeInfo(t.NodeID)
 	if err == nil && nodeInfo.addTask(t) {
-		s.nodeHeap.updateNode(nodeInfo)
+		s.nodeSet.updateNode(nodeInfo)
 	}
 
 	return 0
@@ -268,14 +262,14 @@ func (s *Scheduler) updateTask(ctx context.Context, t *api.Task) int {
 func (s *Scheduler) deleteTask(ctx context.Context, t *api.Task) {
 	delete(s.allTasks, t.ID)
 	delete(s.preassignedTasks, t.ID)
-	nodeInfo, err := s.nodeHeap.nodeInfo(t.NodeID)
+	nodeInfo, err := s.nodeSet.nodeInfo(t.NodeID)
 	if err == nil && nodeInfo.removeTask(t) {
-		s.nodeHeap.updateNode(nodeInfo)
+		s.nodeSet.updateNode(nodeInfo)
 	}
 }
 
 func (s *Scheduler) createOrUpdateNode(n *api.Node) {
-	nodeInfo, _ := s.nodeHeap.nodeInfo(n.ID)
+	nodeInfo, _ := s.nodeSet.nodeInfo(n.ID)
 	var resources api.Resources
 	if n.Description != nil && n.Description.Resources != nil {
 		resources = *n.Description.Resources
@@ -288,7 +282,7 @@ func (s *Scheduler) createOrUpdateNode(n *api.Node) {
 	}
 	nodeInfo.Node = n
 	nodeInfo.AvailableResources = resources
-	s.nodeHeap.addOrUpdateNode(nodeInfo)
+	s.nodeSet.addOrUpdateNode(nodeInfo)
 }
 
 func (s *Scheduler) processPreassignedTasks(ctx context.Context) {
@@ -308,44 +302,60 @@ func (s *Scheduler) processPreassignedTasks(ctx context.Context) {
 	}
 	for _, decision := range failed {
 		s.allTasks[decision.old.ID] = decision.old
-		nodeInfo, err := s.nodeHeap.nodeInfo(decision.new.NodeID)
+		nodeInfo, err := s.nodeSet.nodeInfo(decision.new.NodeID)
 		if err == nil && nodeInfo.removeTask(decision.new) {
-			s.nodeHeap.updateNode(nodeInfo)
+			s.nodeSet.updateNode(nodeInfo)
 		}
 	}
 }
 
 // tick attempts to schedule the queue.
 func (s *Scheduler) tick(ctx context.Context) {
+	tasksByCommonSpec := make(map[string]map[string]*api.Task)
 	schedulingDecisions := make(map[string]schedulingDecision, s.unassignedTasks.Len())
 
 	var next *list.Element
 	for e := s.unassignedTasks.Front(); e != nil; e = next {
 		next = e.Next()
-		id := e.Value.(*api.Task).ID
-		if _, ok := schedulingDecisions[id]; ok {
-			s.unassignedTasks.Remove(e)
-			continue
-		}
 		t := s.allTasks[e.Value.(*api.Task).ID]
 		if t == nil || t.NodeID != "" {
 			// task deleted or already assigned
 			s.unassignedTasks.Remove(e)
 			continue
 		}
-		if newT := s.scheduleTask(ctx, t); newT != nil {
-			schedulingDecisions[id] = schedulingDecision{old: t, new: newT}
-			s.unassignedTasks.Remove(e)
+
+		// Group common tasks with common specs by marshalling the spec
+		// into taskKey and using it as a map key.
+		// TODO(aaronl): Once specs are versioned, this will allow a
+		// much more efficient fast path.
+		fieldsToMarshal := api.Task{
+			ServiceID: t.ServiceID,
+			Spec:      t.Spec,
 		}
+		marshalled, err := fieldsToMarshal.Marshal()
+		if err != nil {
+			panic(err)
+		}
+		taskGroupKey := string(marshalled)
+
+		if tasksByCommonSpec[taskGroupKey] == nil {
+			tasksByCommonSpec[taskGroupKey] = make(map[string]*api.Task)
+		}
+		tasksByCommonSpec[taskGroupKey][t.ID] = t
+		s.unassignedTasks.Remove(e)
+	}
+
+	for _, taskGroup := range tasksByCommonSpec {
+		s.scheduleTaskGroup(ctx, taskGroup, schedulingDecisions)
 	}
 
 	_, failed := s.applySchedulingDecisions(ctx, schedulingDecisions)
 	for _, decision := range failed {
 		s.allTasks[decision.old.ID] = decision.old
 
-		nodeInfo, err := s.nodeHeap.nodeInfo(decision.new.NodeID)
+		nodeInfo, err := s.nodeSet.nodeInfo(decision.new.NodeID)
 		if err == nil && nodeInfo.removeTask(decision.new) {
-			s.nodeHeap.updateNode(nodeInfo)
+			s.nodeSet.updateNode(nodeInfo)
 		}
 
 		// enqueue task for next scheduling attempt
@@ -401,11 +411,11 @@ func (s *Scheduler) applySchedulingDecisions(ctx context.Context, schedulingDeci
 	return
 }
 
-// taskFitNode checks if a node has enough resource to accommodate a task
+// taskFitNode checks if a node has enough resources to accommodate a task.
 func (s *Scheduler) taskFitNode(ctx context.Context, t *api.Task, nodeID string) *api.Task {
-	nodeInfo, err := s.nodeHeap.nodeInfo(nodeID)
+	nodeInfo, err := s.nodeSet.nodeInfo(nodeID)
 	if err != nil {
-		// node does not exist in heap (it may have been deleted)
+		// node does not exist in set (it may have been deleted)
 		return nil
 	}
 	s.pipeline.SetTask(t)
@@ -422,57 +432,118 @@ func (s *Scheduler) taskFitNode(ctx context.Context, t *api.Task, nodeID string)
 	s.allTasks[t.ID] = &newT
 
 	if nodeInfo.addTask(&newT) {
-		s.nodeHeap.updateNode(nodeInfo)
+		s.nodeSet.updateNode(nodeInfo)
 	}
 	return &newT
 }
 
-// scheduleTask schedules a single task.
-func (s *Scheduler) scheduleTask(ctx context.Context, t *api.Task) *api.Task {
+// scheduleTaskGroup schedules a batch of tasks that are part of the same
+// service and share the same version of the spec.
+func (s *Scheduler) scheduleTaskGroup(ctx context.Context, taskGroup map[string]*api.Task, schedulingDecisions map[string]schedulingDecision) {
+	// Pick at task at random from taskGroup to use for constraint
+	// evaluation. It doesn't matter which one we pick because all the
+	// tasks in the group are equal in terms of the fields the constraint
+	// filters consider.
+	var t *api.Task
+	for _, t = range taskGroup {
+		break
+	}
+
 	s.pipeline.SetTask(t)
-	n, _ := s.nodeHeap.findMin(s.pipeline.Process, s.scanAllNodes)
-	if n == nil {
-		log.G(ctx).WithField("task.id", t.ID).Debug("No suitable node available for task")
-		return nil
+
+	nodeLess := func(a *NodeInfo, b *NodeInfo) bool {
+		tasksByServiceA := a.DesiredRunningTasksCountByService[t.ServiceID]
+		tasksByServiceB := b.DesiredRunningTasksCountByService[t.ServiceID]
+
+		if tasksByServiceA < tasksByServiceB {
+			return true
+		}
+		if tasksByServiceA > tasksByServiceB {
+			return false
+		}
+
+		// Total number of tasks breaks ties.
+		return a.DesiredRunningTasksCount < b.DesiredRunningTasksCount
 	}
 
-	log.G(ctx).WithField("task.id", t.ID).Debugf("Assigning to node %s", n.ID)
-	newT := *t
-	newT.NodeID = n.ID
-	newT.Status = api.TaskStatus{
-		State:     api.TaskStateAssigned,
-		Timestamp: ptypes.MustTimestampProto(time.Now()),
-		Message:   "scheduler assigned task to node",
+	nodes := s.nodeSet.findBestNodes(len(taskGroup), s.pipeline.Process, nodeLess)
+	if len(nodes) == 0 {
+		for _, t := range taskGroup {
+			log.G(ctx).WithField("task.id", t.ID).Debug("no suitable node available for task")
+			s.enqueue(t)
+		}
+		return
 	}
-	s.allTasks[t.ID] = &newT
 
-	nodeInfo, err := s.nodeHeap.nodeInfo(n.ID)
-	if err == nil && nodeInfo.addTask(&newT) {
-		s.nodeHeap.updateNode(nodeInfo)
+	failedConstraints := make(map[int]bool) // key is index in nodes slice
+	nodeIter := 0
+	for taskID, t := range taskGroup {
+		n := &nodes[nodeIter%len(nodes)]
+
+		log.G(ctx).WithField("task.id", t.ID).Debugf("assigning to node %s", n.ID)
+		newT := *t
+		newT.NodeID = n.ID
+		newT.Status = api.TaskStatus{
+			State:     api.TaskStateAssigned,
+			Timestamp: ptypes.MustTimestampProto(time.Now()),
+			Message:   "scheduler assigned task to node",
+		}
+		s.allTasks[t.ID] = &newT
+
+		nodeInfo, err := s.nodeSet.nodeInfo(n.ID)
+		if err == nil && nodeInfo.addTask(&newT) {
+			s.nodeSet.updateNode(nodeInfo)
+			nodes[nodeIter%len(nodes)] = nodeInfo
+		}
+
+		schedulingDecisions[taskID] = schedulingDecision{old: t, new: &newT}
+		delete(taskGroup, taskID)
+
+		if nodeIter+1 < len(nodes) {
+			// First pass fills the nodes until they have the same
+			// number of tasks from this service.
+			nextNode := nodes[(nodeIter+1)%len(nodes)]
+			if nodeLess(&nextNode, &nodeInfo) {
+				nodeIter++
+				continue
+			}
+		} else {
+			// In later passes, we just assign one task at a time
+			// to each node that still meets the constraints.
+			nodeIter++
+		}
+
+		origNodeIter := nodeIter
+		for failedConstraints[nodeIter%len(nodes)] || !s.pipeline.Process(&nodes[nodeIter%len(nodes)]) {
+			failedConstraints[nodeIter%len(nodes)] = true
+			nodeIter++
+			if nodeIter-origNodeIter == len(nodes) {
+				// None of the nodes meet the constraints anymore.
+				for _, t := range taskGroup {
+					log.G(ctx).WithField("task.id", t.ID).Debug("no suitable node available for task")
+					s.enqueue(t)
+				}
+				return
+			}
+		}
 	}
-	return &newT
 }
 
-func (s *Scheduler) buildNodeHeap(tx store.ReadTx, tasksByNode map[string]map[string]*api.Task) error {
+func (s *Scheduler) buildNodeSet(tx store.ReadTx, tasksByNode map[string]map[string]*api.Task) error {
 	nodes, err := store.FindNodes(tx, store.All)
 	if err != nil {
 		return err
 	}
 
-	s.nodeHeap.alloc(len(nodes))
+	s.nodeSet.alloc(len(nodes))
 
-	i := 0
 	for _, n := range nodes {
 		var resources api.Resources
 		if n.Description != nil && n.Description.Resources != nil {
 			resources = *n.Description.Resources
 		}
-		s.nodeHeap.heap = append(s.nodeHeap.heap, newNodeInfo(n, tasksByNode[n.ID], resources))
-		s.nodeHeap.index[n.ID] = i
-		i++
+		s.nodeSet.addOrUpdateNode(newNodeInfo(n, tasksByNode[n.ID], resources))
 	}
 
-	heap.Init(&s.nodeHeap)
-
 	return nil
 }

+ 3 - 0
vendor/src/github.com/docker/swarmkit/manager/state/raft/membership/cluster.go

@@ -74,6 +74,9 @@ func (c *Cluster) Tick() {
 		m.tick++
 		if m.tick > c.heartbeatTicks {
 			m.active = false
+			if m.Conn != nil {
+				m.Conn.Close()
+			}
 		}
 	}
 }

+ 114 - 56
vendor/src/github.com/docker/swarmkit/manager/state/raft/raft.go

@@ -26,6 +26,7 @@ import (
 	"github.com/docker/swarmkit/api"
 	"github.com/docker/swarmkit/ca"
 	"github.com/docker/swarmkit/log"
+	"github.com/docker/swarmkit/manager/raftselector"
 	"github.com/docker/swarmkit/manager/state/raft/membership"
 	"github.com/docker/swarmkit/manager/state/store"
 	"github.com/docker/swarmkit/manager/state/watch"
@@ -82,7 +83,7 @@ type Node struct {
 	Server         *grpc.Server
 	Ctx            context.Context
 	cancel         func()
-	tlsCredentials credentials.TransportAuthenticator
+	tlsCredentials credentials.TransportCredentials
 
 	Address  string
 	StateDir string
@@ -152,7 +153,7 @@ type NewNodeOptions struct {
 	// SendTimeout is the timeout on the sending messages to other raft
 	// nodes. Leave this as 0 to get the default value.
 	SendTimeout    time.Duration
-	TLSCredentials credentials.TransportAuthenticator
+	TLSCredentials credentials.TransportCredentials
 }
 
 func init() {
@@ -176,7 +177,7 @@ func NewNode(ctx context.Context, opts NewNodeOptions) *Node {
 	n := &Node{
 		Ctx:            ctx,
 		cancel:         cancel,
-		cluster:        membership.NewCluster(cfg.ElectionTick),
+		cluster:        membership.NewCluster(2 * cfg.ElectionTick),
 		tlsCredentials: opts.TLSCredentials,
 		raftStore:      raftStore,
 		Address:        opts.Addr,
@@ -395,39 +396,55 @@ func (n *Node) Run(ctx context.Context) error {
 				n.confState = rd.Snapshot.Metadata.ConfState
 			}
 
-			// Process committed entries
-			for _, entry := range rd.CommittedEntries {
-				if err := n.processCommitted(entry); err != nil {
-					n.Config.Logger.Error(err)
-				}
-			}
+			// If we cease to be the leader, we must cancel any
+			// proposals that are currently waiting for a quorum to
+			// acknowledge them. It is still possible for these to
+			// become committed, but if that happens we will apply
+			// them as any follower would.
 
-			// Trigger a snapshot every once in awhile
-			if n.snapshotInProgress == nil &&
-				raftConfig.SnapshotInterval > 0 &&
-				n.appliedIndex-n.snapshotIndex >= raftConfig.SnapshotInterval {
-				n.doSnapshot(&raftConfig)
-			}
+			// It is important that we cancel these proposals before
+			// calling processCommitted, so processCommitted does
+			// not deadlock.
 
-			// If we cease to be the leader, we must cancel
-			// any proposals that are currently waiting for
-			// a quorum to acknowledge them. It is still
-			// possible for these to become committed, but
-			// if that happens we will apply them as any
-			// follower would.
 			if rd.SoftState != nil {
 				if wasLeader && rd.SoftState.RaftState != raft.StateLeader {
 					wasLeader = false
-					n.wait.cancelAll()
 					if atomic.LoadUint32(&n.signalledLeadership) == 1 {
 						atomic.StoreUint32(&n.signalledLeadership, 0)
 						n.leadershipBroadcast.Publish(IsFollower)
 					}
+
+					// It is important that we set n.signalledLeadership to 0
+					// before calling n.wait.cancelAll. When a new raft
+					// request is registered, it checks n.signalledLeadership
+					// afterwards, and cancels the registration if it is 0.
+					// If cancelAll was called first, this call might run
+					// before the new request registers, but
+					// signalledLeadership would be set after the check.
+					// Setting signalledLeadership before calling cancelAll
+					// ensures that if a new request is registered during
+					// this transition, it will either be cancelled by
+					// cancelAll, or by its own check of signalledLeadership.
+					n.wait.cancelAll()
 				} else if !wasLeader && rd.SoftState.RaftState == raft.StateLeader {
 					wasLeader = true
 				}
 			}
 
+			// Process committed entries
+			for _, entry := range rd.CommittedEntries {
+				if err := n.processCommitted(entry); err != nil {
+					n.Config.Logger.Error(err)
+				}
+			}
+
+			// Trigger a snapshot every once in awhile
+			if n.snapshotInProgress == nil &&
+				raftConfig.SnapshotInterval > 0 &&
+				n.appliedIndex-n.snapshotIndex >= raftConfig.SnapshotInterval {
+				n.doSnapshot(&raftConfig)
+			}
+
 			if wasLeader && atomic.LoadUint32(&n.signalledLeadership) != 1 {
 				// If all the entries in the log have become
 				// committed, broadcast our leadership status.
@@ -539,11 +556,11 @@ func (n *Node) Leader() (uint64, error) {
 	defer n.stopMu.RUnlock()
 
 	if !n.IsMember() {
-		return 0, ErrNoRaftMember
+		return raft.None, ErrNoRaftMember
 	}
 	leader := n.leader()
-	if leader == 0 {
-		return 0, ErrNoClusterLeader
+	if leader == raft.None {
+		return raft.None, ErrNoClusterLeader
 	}
 
 	return leader, nil
@@ -658,6 +675,12 @@ func (n *Node) checkHealth(ctx context.Context, addr string, timeout time.Durati
 		return err
 	}
 
+	if timeout != 0 {
+		tctx, cancel := context.WithTimeout(ctx, timeout)
+		defer cancel()
+		ctx = tctx
+	}
+
 	client := api.NewHealthClient(conn)
 	defer conn.Close()
 
@@ -828,25 +851,54 @@ func (n *Node) ResolveAddress(ctx context.Context, msg *api.ResolveAddressReques
 	return &api.ResolveAddressResponse{Addr: member.Addr}, nil
 }
 
-// LeaderAddr returns address of current cluster leader.
-// With this method Node satisfies raftpicker.AddrSelector interface.
-func (n *Node) LeaderAddr() (string, error) {
-	ctx, cancel := context.WithTimeout(n.Ctx, 10*time.Second)
-	defer cancel()
-	if err := WaitForLeader(ctx, n); err != nil {
-		return "", ErrNoClusterLeader
+func (n *Node) getLeaderConn() (*grpc.ClientConn, error) {
+	leader, err := n.Leader()
+	if err != nil {
+		return nil, err
 	}
-	n.stopMu.RLock()
-	defer n.stopMu.RUnlock()
-	if !n.IsMember() {
-		return "", ErrNoRaftMember
+
+	if leader == n.Config.ID {
+		return nil, raftselector.ErrIsLeader
 	}
-	ms := n.cluster.Members()
-	l := ms[n.leader()]
+	l := n.cluster.GetMember(leader)
 	if l == nil {
-		return "", ErrNoClusterLeader
+		return nil, fmt.Errorf("no leader found")
+	}
+	if !n.cluster.Active(leader) {
+		return nil, fmt.Errorf("leader marked as inactive")
+	}
+	if l.Conn == nil {
+		return nil, fmt.Errorf("no connection to leader in member list")
+	}
+	return l.Conn, nil
+}
+
+// LeaderConn returns current connection to cluster leader or raftselector.ErrIsLeader
+// if current machine is leader.
+func (n *Node) LeaderConn(ctx context.Context) (*grpc.ClientConn, error) {
+	cc, err := n.getLeaderConn()
+	if err == nil {
+		return cc, nil
+	}
+	if err == raftselector.ErrIsLeader {
+		return nil, err
+	}
+	ticker := time.NewTicker(1 * time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			cc, err := n.getLeaderConn()
+			if err == nil {
+				return cc, nil
+			}
+			if err == raftselector.ErrIsLeader {
+				return nil, err
+			}
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
 	}
-	return l.Addr, nil
 }
 
 // registerNode registers a new node on the cluster memberlist
@@ -943,7 +995,7 @@ func (n *Node) GetMemberlist() map[uint64]*api.RaftMember {
 	members := n.cluster.Members()
 	leaderID, err := n.Leader()
 	if err != nil {
-		leaderID = 0
+		leaderID = raft.None
 	}
 
 	for id, member := range members {
@@ -1163,7 +1215,11 @@ func (n *Node) processInternalRaftRequest(ctx context.Context, r *api.InternalRa
 
 	r.ID = n.reqIDGen.Next()
 
-	ch := n.wait.register(r.ID, cb)
+	// This must be derived from the context which is cancelled by stop()
+	// to avoid a deadlock on shutdown.
+	waitCtx, cancel := context.WithCancel(n.Ctx)
+
+	ch := n.wait.register(r.ID, cb, cancel)
 
 	// Do this check after calling register to avoid a race.
 	if atomic.LoadUint32(&n.signalledLeadership) != 1 {
@@ -1182,24 +1238,19 @@ func (n *Node) processInternalRaftRequest(ctx context.Context, r *api.InternalRa
 		return nil, ErrRequestTooLarge
 	}
 
-	// This must use the context which is cancelled by stop() to avoid a
-	// deadlock on shutdown.
-	err = n.Propose(n.Ctx, data)
+	err = n.Propose(waitCtx, data)
 	if err != nil {
 		n.wait.cancel(r.ID)
 		return nil, err
 	}
 
 	select {
-	case x, ok := <-ch:
-		if ok {
-			res := x.(*applyResult)
-			return res.resp, res.err
-		}
-		return nil, ErrLostLeadership
-	case <-n.Ctx.Done():
+	case x := <-ch:
+		res := x.(*applyResult)
+		return res.resp, res.err
+	case <-waitCtx.Done():
 		n.wait.cancel(r.ID)
-		return nil, ErrStopped
+		return nil, ErrLostLeadership
 	case <-ctx.Done():
 		n.wait.cancel(r.ID)
 		return nil, ctx.Err()
@@ -1211,10 +1262,12 @@ func (n *Node) processInternalRaftRequest(ctx context.Context, r *api.InternalRa
 // until the change is performed or there is an error.
 func (n *Node) configure(ctx context.Context, cc raftpb.ConfChange) error {
 	cc.ID = n.reqIDGen.Next()
-	ch := n.wait.register(cc.ID, nil)
+
+	ctx, cancel := context.WithCancel(ctx)
+	ch := n.wait.register(cc.ID, nil, cancel)
 
 	if err := n.ProposeConfChange(ctx, cc); err != nil {
-		n.wait.trigger(cc.ID, nil)
+		n.wait.cancel(cc.ID)
 		return err
 	}
 
@@ -1228,7 +1281,7 @@ func (n *Node) configure(ctx context.Context, cc raftpb.ConfChange) error {
 		}
 		return nil
 	case <-ctx.Done():
-		n.wait.trigger(cc.ID, nil)
+		n.wait.cancel(cc.ID)
 		return ctx.Err()
 	case <-n.Ctx.Done():
 		return ErrStopped
@@ -1271,6 +1324,11 @@ func (n *Node) processEntry(entry raftpb.Entry) error {
 		// position and cancelling the transaction. Create a new
 		// transaction to commit the data.
 
+		// It should not be possible for processInternalRaftRequest
+		// to be running in this situation, but out of caution we
+		// cancel any current invocations to avoid a deadlock.
+		n.wait.cancelAll()
+
 		err := n.memoryStore.ApplyStoreActions(r.Action)
 		if err != nil {
 			log.G(context.Background()).Errorf("error applying actions from raft: %v", err)

+ 1 - 1
vendor/src/github.com/docker/swarmkit/manager/state/raft/util.go

@@ -13,7 +13,7 @@ import (
 )
 
 // dial returns a grpc client connection
-func dial(addr string, protocol string, creds credentials.TransportAuthenticator, timeout time.Duration) (*grpc.ClientConn, error) {
+func dial(addr string, protocol string, creds credentials.TransportCredentials, timeout time.Duration) (*grpc.ClientConn, error) {
 	grpcOptions := []grpc.DialOption{
 		grpc.WithBackoffMaxDelay(2 * time.Second),
 		grpc.WithTransportCredentials(creds),

+ 9 - 6
vendor/src/github.com/docker/swarmkit/manager/state/raft/wait.go

@@ -10,6 +10,8 @@ type waitItem struct {
 	ch chan interface{}
 	// callback which is called synchronously when the wait is triggered
 	cb func()
+	// callback which is called to cancel a waiter
+	cancel func()
 }
 
 type wait struct {
@@ -21,13 +23,13 @@ func newWait() *wait {
 	return &wait{m: make(map[uint64]waitItem)}
 }
 
-func (w *wait) register(id uint64, cb func()) <-chan interface{} {
+func (w *wait) register(id uint64, cb func(), cancel func()) <-chan interface{} {
 	w.l.Lock()
 	defer w.l.Unlock()
 	_, ok := w.m[id]
 	if !ok {
 		ch := make(chan interface{}, 1)
-		w.m[id] = waitItem{ch: ch, cb: cb}
+		w.m[id] = waitItem{ch: ch, cb: cb, cancel: cancel}
 		return ch
 	}
 	panic(fmt.Sprintf("duplicate id %x", id))
@@ -43,7 +45,6 @@ func (w *wait) trigger(id uint64, x interface{}) bool {
 			waitItem.cb()
 		}
 		waitItem.ch <- x
-		close(waitItem.ch)
 		return true
 	}
 	return false
@@ -54,8 +55,8 @@ func (w *wait) cancel(id uint64) {
 	waitItem, ok := w.m[id]
 	delete(w.m, id)
 	w.l.Unlock()
-	if ok {
-		close(waitItem.ch)
+	if ok && waitItem.cancel != nil {
+		waitItem.cancel()
 	}
 }
 
@@ -65,6 +66,8 @@ func (w *wait) cancelAll() {
 
 	for id, waitItem := range w.m {
 		delete(w.m, id)
-		close(waitItem.ch)
+		if waitItem.cancel != nil {
+			waitItem.cancel()
+		}
 	}
 }

+ 24 - 1
vendor/src/github.com/docker/swarmkit/manager/state/watch/watch.go

@@ -6,6 +6,29 @@ import (
 	"github.com/docker/go-events"
 )
 
+// dropErrClosed is a sink that suppresses ErrSinkClosed from Write, to avoid
+// debug log messages that may be confusing. It is possible that the queue
+// will try to write an event to its destination channel while the queue is
+// being removed from the broadcaster. Since the channel is closed before the
+// queue, there is a narrow window when this is possible. In some event-based
+// dropping events when a sink is removed from a broadcaster is a problem, but
+// for the usage in this watch package that's the expected behavior.
+type dropErrClosed struct {
+	sink events.Sink
+}
+
+func (s dropErrClosed) Write(event events.Event) error {
+	err := s.sink.Write(event)
+	if err == events.ErrSinkClosed {
+		return nil
+	}
+	return err
+}
+
+func (s dropErrClosed) Close() error {
+	return s.sink.Close()
+}
+
 // Queue is the structure used to publish events and watch for them.
 type Queue struct {
 	mu          sync.Mutex
@@ -35,7 +58,7 @@ func (q *Queue) Watch() (eventq chan events.Event, cancel func()) {
 // close the channel.
 func (q *Queue) CallbackWatch(matcher events.Matcher) (eventq chan events.Event, cancel func()) {
 	ch := events.NewChannel(0)
-	sink := events.Sink(events.NewQueue(ch))
+	sink := events.Sink(events.NewQueue(dropErrClosed{sink: ch}))
 
 	if matcher != nil {
 		sink = events.NewFilter(sink, matcher)

+ 9 - 4
vendor/src/github.com/docker/swarmkit/protobuf/plugin/plugin.pb.go

@@ -33,7 +33,9 @@ var _ = math.Inf
 
 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the proto package it is being compiled against.
-const _ = proto.GoGoProtoPackageIsVersion1
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
 
 type TLSAuthorization struct {
 	// Roles contains the acceptable TLS OU roles for the handler.
@@ -96,11 +98,12 @@ func valueToGoStringPlugin(v interface{}, typ string) string {
 	pv := reflect.Indirect(rv).Interface()
 	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
 }
-func extensionToGoStringPlugin(e map[int32]github_com_gogo_protobuf_proto.Extension) string {
+func extensionToGoStringPlugin(m github_com_gogo_protobuf_proto.Message) string {
+	e := github_com_gogo_protobuf_proto.GetUnsafeExtensionsMap(m)
 	if e == nil {
 		return "nil"
 	}
-	s := "map[int32]proto.Extension{"
+	s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{"
 	keys := make([]int, 0, len(e))
 	for k := range e {
 		keys = append(keys, int(k))
@@ -110,7 +113,7 @@ func extensionToGoStringPlugin(e map[int32]github_com_gogo_protobuf_proto.Extens
 	for _, k := range keys {
 		ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString())
 	}
-	s += strings.Join(ss, ",") + "}"
+	s += strings.Join(ss, ",") + "})"
 	return s
 }
 func (m *TLSAuthorization) Marshal() (data []byte, err error) {
@@ -443,6 +446,8 @@ var (
 	ErrIntOverflowPlugin   = fmt.Errorf("proto: integer overflow")
 )
 
+func init() { proto.RegisterFile("plugin.proto", fileDescriptorPlugin) }
+
 var fileDescriptorPlugin = []byte{
 	// 259 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0x29, 0xc8, 0x29, 0x4d,

部分文件因为文件数量过多而无法显示