瀏覽代碼

Merge pull request #345 from thaJeztah/19.03_backport_swarm_flaky

[19.03 backport] integration-cli: fix swarm tests flakiness
Sebastiaan van Stijn 5 年之前
父節點
當前提交
b262d40daf
共有 3 個文件被更改,包括 52 次插入9 次删除
  1. 14 2
      integration-cli/docker_api_swarm_test.go
  2. 30 6
      integration-cli/docker_cli_swarm_test.go
  3. 8 1
      internal/test/daemon/node.go

+ 14 - 2
integration-cli/docker_api_swarm_test.go

@@ -27,6 +27,7 @@ import (
 	"github.com/docker/docker/internal/test/request"
 	"github.com/docker/docker/internal/test/request"
 	"github.com/docker/swarmkit/ca"
 	"github.com/docker/swarmkit/ca"
 	"github.com/go-check/check"
 	"github.com/go-check/check"
+	"github.com/pkg/errors"
 	"gotest.tools/assert"
 	"gotest.tools/assert"
 	is "gotest.tools/assert/cmp"
 	is "gotest.tools/assert/cmp"
 )
 )
@@ -313,13 +314,24 @@ func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *check.C) {
 		leader    *daemon.Daemon   // keep track of leader
 		leader    *daemon.Daemon   // keep track of leader
 		followers []*daemon.Daemon // keep track of followers
 		followers []*daemon.Daemon // keep track of followers
 	)
 	)
+	var lastErr error
 	checkLeader := func(nodes ...*daemon.Daemon) checkF {
 	checkLeader := func(nodes ...*daemon.Daemon) checkF {
 		return func(c *check.C) (interface{}, check.CommentInterface) {
 		return func(c *check.C) (interface{}, check.CommentInterface) {
 			// clear these out before each run
 			// clear these out before each run
 			leader = nil
 			leader = nil
 			followers = nil
 			followers = nil
 			for _, d := range nodes {
 			for _, d := range nodes {
-				if d.GetNode(c, d.NodeID()).ManagerStatus.Leader {
+				n := d.GetNode(c, d.NodeID(), func(err error) bool {
+					if strings.Contains(errors.Cause(err).Error(), context.DeadlineExceeded.Error()) || strings.Contains(err.Error(), "swarm does not have a leader") {
+						lastErr = err
+						return true
+					}
+					return false
+				})
+				if n == nil {
+					return false, check.Commentf("failed to get node: %v", lastErr)
+				}
+				if n.ManagerStatus.Leader {
 					leader = d
 					leader = d
 				} else {
 				} else {
 					followers = append(followers, d)
 					followers = append(followers, d)
@@ -391,7 +403,7 @@ func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *check.C) {
 	defer cli.Close()
 	defer cli.Close()
 
 
 	// d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen
 	// d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen
-	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+	waitAndAssert(c, defaultReconciliationTimeout*2, func(c *check.C) (interface{}, check.CommentInterface) {
 		_, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{})
 		_, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{})
 		return err.Error(), nil
 		return err.Error(), nil
 	}, checker.Contains, "Make sure more than half of the managers are online.")
 	}, checker.Contains, "Make sure more than half of the managers are online.")

+ 30 - 6
integration-cli/docker_cli_swarm_test.go

@@ -1303,9 +1303,21 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *check.C) {
 
 
 		c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive)
 		c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive)
 
 
-		outs, err = d.Cmd("node", "ls")
-		assert.NilError(c, err)
-		c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked")
+		retry := 0
+		for {
+			// an issue sometimes prevents leader to be available right away
+			outs, err = d.Cmd("node", "ls")
+			if err != nil && retry < 5 {
+				if strings.Contains(outs, "swarm does not have a leader") {
+					retry++
+					time.Sleep(3 * time.Second)
+					continue
+				}
+			}
+			assert.NilError(c, err)
+			c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked")
+			break
+		}
 
 
 		unlockKey = newUnlockKey
 		unlockKey = newUnlockKey
 	}
 	}
@@ -1383,9 +1395,21 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *check.C) {
 
 
 			c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive)
 			c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive)
 
 
-			outs, err = d.Cmd("node", "ls")
-			c.Assert(err, checker.IsNil, check.Commentf("%s", outs))
-			c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked")
+			retry := 0
+			for {
+				// an issue sometimes prevents leader to be available right away
+				outs, err = d.Cmd("node", "ls")
+				if err != nil && retry < 5 {
+					if strings.Contains(outs, "swarm does not have a leader") {
+						retry++
+						time.Sleep(3 * time.Second)
+						continue
+					}
+				}
+				c.Assert(err, checker.IsNil, check.Commentf("%s", outs))
+				c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked")
+				break
+			}
 		}
 		}
 
 
 		unlockKey = newUnlockKey
 		unlockKey = newUnlockKey

+ 8 - 1
internal/test/daemon/node.go

@@ -15,7 +15,7 @@ import (
 type NodeConstructor func(*swarm.Node)
 type NodeConstructor func(*swarm.Node)
 
 
 // GetNode returns a swarm node identified by the specified id
 // GetNode returns a swarm node identified by the specified id
-func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node {
+func (d *Daemon) GetNode(t assert.TestingT, id string, errCheck ...func(error) bool) *swarm.Node {
 	if ht, ok := t.(test.HelperT); ok {
 	if ht, ok := t.(test.HelperT); ok {
 		ht.Helper()
 		ht.Helper()
 	}
 	}
@@ -23,6 +23,13 @@ func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node {
 	defer cli.Close()
 	defer cli.Close()
 
 
 	node, _, err := cli.NodeInspectWithRaw(context.Background(), id)
 	node, _, err := cli.NodeInspectWithRaw(context.Background(), id)
+	if err != nil {
+		for _, f := range errCheck {
+			if f(err) {
+				return nil
+			}
+		}
+	}
 	assert.NilError(t, err, "[%s] (*Daemon).GetNode: NodeInspectWithRaw(%q) failed", d.id, id)
 	assert.NilError(t, err, "[%s] (*Daemon).GetNode: NodeInspectWithRaw(%q) failed", d.id, id)
 	assert.Check(t, node.ID == id)
 	assert.Check(t, node.ID == id)
 	return &node
 	return &node