diff --git a/vendor.conf b/vendor.conf index e749a00f88..09a6715458 100644 --- a/vendor.conf +++ b/vendor.conf @@ -100,7 +100,7 @@ github.com/docker/containerd 03e5862ec0d8d3b3f750e19fca3ee367e13c090e github.com/tonistiigi/fifo 1405643975692217d6720f8b54aeee1bf2cd5cf4 # cluster -github.com/docker/swarmkit 99adeb1c6b33cebc81c31dd05b163080033062f2 +github.com/docker/swarmkit 7f910df8587ad86b62be7a023a2236183e68d879 github.com/golang/mock bd3c8e81be01eef76d4b503f5e687d2d1354d2d9 github.com/gogo/protobuf v0.3 github.com/cloudflare/cfssl 7fb22c8cba7ecaf98e4082d22d65800cf45e042a diff --git a/vendor/github.com/docker/swarmkit/manager/state/raft/raft.go b/vendor/github.com/docker/swarmkit/manager/state/raft/raft.go index 721437b18e..db8e56be4a 100644 --- a/vendor/github.com/docker/swarmkit/manager/state/raft/raft.go +++ b/vendor/github.com/docker/swarmkit/manager/state/raft/raft.go @@ -409,7 +409,7 @@ func (n *Node) Run(ctx context.Context) error { // Save entries to storage if err := n.saveToStorage(ctx, &raftConfig, rd.HardState, rd.Entries, rd.Snapshot); err != nil { - log.G(ctx).WithError(err).Error("failed to save entries to storage") + return errors.Wrap(err, "failed to save entries to storage") } if len(rd.Messages) != 0 { @@ -714,11 +714,20 @@ func (n *Node) Join(ctx context.Context, req *api.JoinRequest) (*api.JoinRespons defer n.membershipLock.Unlock() if !n.IsMember() { - return nil, ErrNoRaftMember + return nil, grpc.Errorf(codes.FailedPrecondition, "%s", ErrNoRaftMember.Error()) } if !n.isLeader() { - return nil, ErrLostLeadership + return nil, grpc.Errorf(codes.FailedPrecondition, "%s", ErrLostLeadership.Error()) + } + + // A single manager must not be able to join the raft cluster twice. If + // it did, that would cause the quorum to be computed incorrectly. This + // could happen if the WAL was deleted from an active manager. + for _, m := range n.cluster.Members() { + if m.NodeID == nodeInfo.NodeID { + return nil, grpc.Errorf(codes.AlreadyExists, "%s", "a raft member with this node ID already exists") + } } // Find a unique ID for the joining member. @@ -738,7 +747,7 @@ func (n *Node) Join(ctx context.Context, req *api.JoinRequest) (*api.JoinRespons requestHost, requestPort, err := net.SplitHostPort(remoteAddr) if err != nil { - return nil, fmt.Errorf("invalid address %s in raft join request", remoteAddr) + return nil, grpc.Errorf(codes.InvalidArgument, "invalid address %s in raft join request", remoteAddr) } requestIP := net.ParseIP(requestHost) @@ -994,6 +1003,11 @@ func (n *Node) ProcessRaftMessage(ctx context.Context, msg *api.ProcessRaftMessa defer n.stopMu.RUnlock() if n.IsMember() { + if msg.Message.To != n.Config.ID { + n.processRaftMessageLogger(ctx, msg).Errorf("received message intended for raft_id %x", msg.Message.To) + return &api.ProcessRaftMessageResponse{}, nil + } + if err := n.raftNode.Step(ctx, *msg.Message); err != nil { n.processRaftMessageLogger(ctx, msg).WithError(err).Debug("raft Step failed") } diff --git a/vendor/github.com/docker/swarmkit/node/node.go b/vendor/github.com/docker/swarmkit/node/node.go index 380a98b063..0fe4d2fe45 100644 --- a/vendor/github.com/docker/swarmkit/node/node.go +++ b/vendor/github.com/docker/swarmkit/node/node.go @@ -297,10 +297,12 @@ func (n *Node) run(ctx context.Context) (err error) { go func() { managerErr = n.runManager(ctx, securityConfig, managerReady) // store err and loop wg.Done() + cancel() }() go func() { agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady) wg.Done() + cancel() }() go func() {