De-flake TestSwarmClusterRotateUnlockKey... again... maybe?
This hopefully makes the test less flakey (or removes any flake that
would be caused by the test itself).
1. Adds tail of cluster daemon logs when there is a test failure so we
can more easily see what may be happening
2. Scans the daemon logs to check if the key is rotated before
restarting the daemon. This is a little hacky but a little better
than assuming it is done after a hard-coded 3 seconds.
3. Cleans up the `node ls` check such that it uses a poll function
Signed-off-by: Brian Goff <cpuguy83@gmail.com>
(cherry picked from commit fbdc02534a
)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
parent
71fa3ab079
commit
2cf1c762f8
3 changed files with 69 additions and 54 deletions
|
@ -657,6 +657,9 @@ func (s *DockerSwarmSuite) TearDownTest(ctx context.Context, c *testing.T) {
|
||||||
s.daemonsLock.Lock()
|
s.daemonsLock.Lock()
|
||||||
for _, d := range s.daemons {
|
for _, d := range s.daemons {
|
||||||
if d != nil {
|
if d != nil {
|
||||||
|
if c.Failed() {
|
||||||
|
d.TailLogsT(c, 100)
|
||||||
|
}
|
||||||
d.Stop(c)
|
d.Stop(c)
|
||||||
d.Cleanup(c)
|
d.Cleanup(c)
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import (
|
||||||
remoteipam "github.com/docker/docker/libnetwork/ipams/remote/api"
|
remoteipam "github.com/docker/docker/libnetwork/ipams/remote/api"
|
||||||
"github.com/docker/docker/pkg/plugins"
|
"github.com/docker/docker/pkg/plugins"
|
||||||
"github.com/docker/docker/testutil"
|
"github.com/docker/docker/testutil"
|
||||||
|
testdaemon "github.com/docker/docker/testutil/daemon"
|
||||||
"github.com/moby/swarmkit/v2/ca/keyutils"
|
"github.com/moby/swarmkit/v2/ca/keyutils"
|
||||||
"github.com/vishvananda/netlink"
|
"github.com/vishvananda/netlink"
|
||||||
"gotest.tools/v3/assert"
|
"gotest.tools/v3/assert"
|
||||||
|
@ -1260,6 +1261,8 @@ func (s *DockerSwarmSuite) TestSwarmJoinPromoteLocked(c *testing.T) {
|
||||||
poll.WaitOn(c, pollCheck(c, d3.CheckLocalNodeState(ctx), checker.Equals(swarm.LocalNodeStateActive)), poll.WithTimeout(time.Second))
|
poll.WaitOn(c, pollCheck(c, d3.CheckLocalNodeState(ctx), checker.Equals(swarm.LocalNodeStateActive)), poll.WithTimeout(time.Second))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const swarmIsEncryptedMsg = "Swarm is encrypted and needs to be unlocked"
|
||||||
|
|
||||||
func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
|
func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
|
||||||
ctx := testutil.GetContext(c)
|
ctx := testutil.GetContext(c)
|
||||||
d := s.AddDaemon(ctx, c, true, true)
|
d := s.AddDaemon(ctx, c, true, true)
|
||||||
|
@ -1280,12 +1283,16 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
|
||||||
d.RestartNode(c)
|
d.RestartNode(c)
|
||||||
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)
|
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)
|
||||||
|
|
||||||
outs, _ = d.Cmd("node", "ls")
|
unlock := func(d *daemon.Daemon, key string) *icmd.Result {
|
||||||
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
|
cmd := d.Command("swarm", "unlock")
|
||||||
cmd := d.Command("swarm", "unlock")
|
cmd.Stdin = strings.NewReader(key)
|
||||||
cmd.Stdin = bytes.NewBufferString(unlockKey)
|
return icmd.RunCmd(cmd)
|
||||||
result := icmd.RunCmd(cmd)
|
}
|
||||||
|
|
||||||
|
outs, _ = d.Cmd("node", "ls")
|
||||||
|
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
|
||||||
|
|
||||||
|
result := unlock(d, unlockKey)
|
||||||
if result.Error == nil {
|
if result.Error == nil {
|
||||||
// On occasion, the daemon may not have finished
|
// On occasion, the daemon may not have finished
|
||||||
// rotating the KEK before restarting. The test is
|
// rotating the KEK before restarting. The test is
|
||||||
|
@ -1295,13 +1302,16 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
|
||||||
// restart again, the new key should be required this
|
// restart again, the new key should be required this
|
||||||
// time.
|
// time.
|
||||||
|
|
||||||
time.Sleep(3 * time.Second)
|
// Wait for the rotation to happen
|
||||||
|
// Since there are multiple rotations, we need to wait until for the number of rotations we are currently on to be reflected in the logs
|
||||||
|
// This is a little janky... its depending on specific log messages AND these are debug logs... but it is the best we can do for now.
|
||||||
|
matcher := testdaemon.ScanLogsMatchCount(testdaemon.ScanLogsMatchString("successfully rotated KEK"), i+1)
|
||||||
|
poll.WaitOn(c, d.PollCheckLogs(ctx, matcher), poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
|
||||||
|
d.Restart(c)
|
||||||
|
|
||||||
d.RestartNode(c)
|
d.RestartNode(c)
|
||||||
|
|
||||||
cmd = d.Command("swarm", "unlock")
|
result = unlock(d, unlockKey)
|
||||||
cmd.Stdin = bytes.NewBufferString(unlockKey)
|
|
||||||
result = icmd.RunCmd(cmd)
|
|
||||||
}
|
}
|
||||||
result.Assert(c, icmd.Expected{
|
result.Assert(c, icmd.Expected{
|
||||||
ExitCode: 1,
|
ExitCode: 1,
|
||||||
|
@ -1309,28 +1319,20 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
|
||||||
})
|
})
|
||||||
|
|
||||||
outs, _ = d.Cmd("node", "ls")
|
outs, _ = d.Cmd("node", "ls")
|
||||||
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
|
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
|
||||||
cmd = d.Command("swarm", "unlock")
|
unlock(d, newUnlockKey).Assert(c, icmd.Success)
|
||||||
cmd.Stdin = bytes.NewBufferString(newUnlockKey)
|
|
||||||
icmd.RunCmd(cmd).Assert(c, icmd.Success)
|
|
||||||
|
|
||||||
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)
|
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)
|
||||||
|
|
||||||
retry := 0
|
checkNodeLs := func(t poll.LogT) poll.Result {
|
||||||
for {
|
|
||||||
// an issue sometimes prevents leader to be available right away
|
// an issue sometimes prevents leader to be available right away
|
||||||
outs, err = d.Cmd("node", "ls")
|
out, err := d.Cmd("node", "ls")
|
||||||
if err != nil && retry < 5 {
|
if err != nil {
|
||||||
if strings.Contains(outs, "swarm does not have a leader") {
|
return poll.Continue("error running node ls: %v: %s", err, out)
|
||||||
retry++
|
|
||||||
time.Sleep(3 * time.Second)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
assert.NilError(c, err)
|
return poll.Success()
|
||||||
assert.Assert(c, !strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
poll.WaitOn(c, checkNodeLs, poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
|
||||||
|
|
||||||
unlockKey = newUnlockKey
|
unlockKey = newUnlockKey
|
||||||
}
|
}
|
||||||
|
@ -1368,15 +1370,23 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
|
||||||
d2.RestartNode(c)
|
d2.RestartNode(c)
|
||||||
d3.RestartNode(c)
|
d3.RestartNode(c)
|
||||||
|
|
||||||
|
unlock := func(d *daemon.Daemon, key string) *icmd.Result {
|
||||||
|
cmd := d.Command("swarm", "unlock")
|
||||||
|
cmd.Stdin = strings.NewReader(key)
|
||||||
|
return icmd.RunCmd(cmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
const swarmIsEncryptedMsg = "Swarm is encrypted and needs to be unlocked"
|
||||||
|
|
||||||
for _, d := range []*daemon.Daemon{d2, d3} {
|
for _, d := range []*daemon.Daemon{d2, d3} {
|
||||||
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)
|
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)
|
||||||
|
|
||||||
outs, _ := d.Cmd("node", "ls")
|
outs, _ := d.Cmd("node", "ls")
|
||||||
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
|
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
|
||||||
cmd := d.Command("swarm", "unlock")
|
|
||||||
cmd.Stdin = bytes.NewBufferString(unlockKey)
|
|
||||||
result := icmd.RunCmd(cmd)
|
|
||||||
|
|
||||||
|
// unlock with the original key should fail
|
||||||
|
// Use poll here because the daemon may not have finished
|
||||||
|
result := unlock(d, unlockKey)
|
||||||
if result.Error == nil {
|
if result.Error == nil {
|
||||||
// On occasion, the daemon may not have finished
|
// On occasion, the daemon may not have finished
|
||||||
// rotating the KEK before restarting. The test is
|
// rotating the KEK before restarting. The test is
|
||||||
|
@ -1386,13 +1396,14 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
|
||||||
// restart again, the new key should be required this
|
// restart again, the new key should be required this
|
||||||
// time.
|
// time.
|
||||||
|
|
||||||
time.Sleep(3 * time.Second)
|
// Wait for the rotation to happen
|
||||||
|
// Since there are multiple rotations, we need to wait until for the number of rotations we are currently on to be reflected in the logs
|
||||||
|
// This is a little janky... its depending on specific log messages AND these are debug logs... but it is the best we can do for now.
|
||||||
|
matcher := testdaemon.ScanLogsMatchCount(testdaemon.ScanLogsMatchString("successfully rotated KEK"), i+1)
|
||||||
|
poll.WaitOn(c, d.PollCheckLogs(ctx, matcher), poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
|
||||||
|
d.Restart(c)
|
||||||
|
|
||||||
d.RestartNode(c)
|
result = unlock(d, unlockKey)
|
||||||
|
|
||||||
cmd = d.Command("swarm", "unlock")
|
|
||||||
cmd.Stdin = bytes.NewBufferString(unlockKey)
|
|
||||||
result = icmd.RunCmd(cmd)
|
|
||||||
}
|
}
|
||||||
result.Assert(c, icmd.Expected{
|
result.Assert(c, icmd.Expected{
|
||||||
ExitCode: 1,
|
ExitCode: 1,
|
||||||
|
@ -1400,31 +1411,21 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
|
||||||
})
|
})
|
||||||
|
|
||||||
outs, _ = d.Cmd("node", "ls")
|
outs, _ = d.Cmd("node", "ls")
|
||||||
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
|
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
|
||||||
cmd = d.Command("swarm", "unlock")
|
|
||||||
cmd.Stdin = bytes.NewBufferString(newUnlockKey)
|
|
||||||
icmd.RunCmd(cmd).Assert(c, icmd.Success)
|
|
||||||
|
|
||||||
|
// now unlock with the rotated key, this should succeed
|
||||||
|
unlock(d, newUnlockKey).Assert(c, icmd.Success)
|
||||||
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)
|
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)
|
||||||
|
|
||||||
retry := 0
|
checkNodeLs := func(t poll.LogT) poll.Result {
|
||||||
for {
|
|
||||||
// an issue sometimes prevents leader to be available right away
|
// an issue sometimes prevents leader to be available right away
|
||||||
outs, err = d.Cmd("node", "ls")
|
out, err := d.Cmd("node", "ls")
|
||||||
if err != nil && retry < 5 {
|
if err != nil {
|
||||||
if strings.Contains(outs, "swarm does not have a leader") {
|
return poll.Continue("error running node ls: %v: %s", err, out)
|
||||||
retry++
|
|
||||||
c.Logf("[%s] got 'swarm does not have a leader'. retrying (attempt %d/5)", d.ID(), retry)
|
|
||||||
time.Sleep(3 * time.Second)
|
|
||||||
continue
|
|
||||||
} else {
|
|
||||||
c.Logf("[%s] gave error: '%v'. retrying (attempt %d/5): %s", d.ID(), err, retry, outs)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
assert.NilError(c, err, "[%s] failed after %d retries: %v (%s)", d.ID(), retry, err, outs)
|
return poll.Success()
|
||||||
assert.Assert(c, !strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
poll.WaitOn(c, checkNodeLs, poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
|
||||||
}
|
}
|
||||||
|
|
||||||
unlockKey = newUnlockKey
|
unlockKey = newUnlockKey
|
||||||
|
|
|
@ -338,6 +338,17 @@ func ScanLogsMatchString(contains string) func(string) bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ScanLogsMatchCount returns a function that can be used to scan the daemon logs until the passed in matcher function matches `count` times
|
||||||
|
func ScanLogsMatchCount(f func(string) bool, count int) func(string) bool {
|
||||||
|
matched := 0
|
||||||
|
return func(line string) bool {
|
||||||
|
if f(line) {
|
||||||
|
matched++
|
||||||
|
}
|
||||||
|
return matched == count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ScanLogsMatchAll returns a function that can be used to scan the daemon logs until *all* the passed in strings are matched
|
// ScanLogsMatchAll returns a function that can be used to scan the daemon logs until *all* the passed in strings are matched
|
||||||
func ScanLogsMatchAll(contains ...string) func(string) bool {
|
func ScanLogsMatchAll(contains ...string) func(string) bool {
|
||||||
matched := make(map[string]bool)
|
matched := make(map[string]bool)
|
||||||
|
|
Loading…
Reference in a new issue