Merge pull request #47009 from cpuguy83/swarm_rotate_key_flake

De-flake TestSwarmClusterRotateUnlockKey... again... maybe?
This commit is contained in:
Sebastiaan van Stijn 2024-01-19 23:13:34 +01:00 committed by GitHub
commit 75fd873fe6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 69 additions and 54 deletions

View file

@ -657,6 +657,9 @@ func (s *DockerSwarmSuite) TearDownTest(ctx context.Context, c *testing.T) {
s.daemonsLock.Lock()
for _, d := range s.daemons {
if d != nil {
if c.Failed() {
d.TailLogsT(c, 100)
}
d.Stop(c)
d.Cleanup(c)
}

View file

@ -27,6 +27,7 @@ import (
remoteipam "github.com/docker/docker/libnetwork/ipams/remote/api"
"github.com/docker/docker/pkg/plugins"
"github.com/docker/docker/testutil"
testdaemon "github.com/docker/docker/testutil/daemon"
"github.com/moby/swarmkit/v2/ca/keyutils"
"github.com/vishvananda/netlink"
"gotest.tools/v3/assert"
@ -1260,6 +1261,8 @@ func (s *DockerSwarmSuite) TestSwarmJoinPromoteLocked(c *testing.T) {
poll.WaitOn(c, pollCheck(c, d3.CheckLocalNodeState(ctx), checker.Equals(swarm.LocalNodeStateActive)), poll.WithTimeout(time.Second))
}
const swarmIsEncryptedMsg = "Swarm is encrypted and needs to be unlocked"
func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
ctx := testutil.GetContext(c)
d := s.AddDaemon(ctx, c, true, true)
@ -1280,12 +1283,16 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
d.RestartNode(c)
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)
outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
unlock := func(d *daemon.Daemon, key string) *icmd.Result {
cmd := d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result := icmd.RunCmd(cmd)
cmd.Stdin = strings.NewReader(key)
return icmd.RunCmd(cmd)
}
outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
result := unlock(d, unlockKey)
if result.Error == nil {
// On occasion, the daemon may not have finished
// rotating the KEK before restarting. The test is
@ -1295,13 +1302,16 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
// restart again, the new key should be required this
// time.
time.Sleep(3 * time.Second)
// Wait for the rotation to happen
// Since there are multiple rotations, we need to wait until for the number of rotations we are currently on to be reflected in the logs
// This is a little janky... its depending on specific log messages AND these are debug logs... but it is the best we can do for now.
matcher := testdaemon.ScanLogsMatchCount(testdaemon.ScanLogsMatchString("successfully rotated KEK"), i+1)
poll.WaitOn(c, d.PollCheckLogs(ctx, matcher), poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
d.Restart(c)
d.RestartNode(c)
cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result = icmd.RunCmd(cmd)
result = unlock(d, unlockKey)
}
result.Assert(c, icmd.Expected{
ExitCode: 1,
@ -1309,28 +1319,20 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
})
outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(newUnlockKey)
icmd.RunCmd(cmd).Assert(c, icmd.Success)
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
unlock(d, newUnlockKey).Assert(c, icmd.Success)
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)
retry := 0
for {
checkNodeLs := func(t poll.LogT) poll.Result {
// an issue sometimes prevents leader to be available right away
outs, err = d.Cmd("node", "ls")
if err != nil && retry < 5 {
if strings.Contains(outs, "swarm does not have a leader") {
retry++
time.Sleep(3 * time.Second)
continue
out, err := d.Cmd("node", "ls")
if err != nil {
return poll.Continue("error running node ls: %v: %s", err, out)
}
return poll.Success()
}
assert.NilError(c, err)
assert.Assert(c, !strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
break
}
poll.WaitOn(c, checkNodeLs, poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
unlockKey = newUnlockKey
}
@ -1368,15 +1370,23 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
d2.RestartNode(c)
d3.RestartNode(c)
unlock := func(d *daemon.Daemon, key string) *icmd.Result {
cmd := d.Command("swarm", "unlock")
cmd.Stdin = strings.NewReader(key)
return icmd.RunCmd(cmd)
}
const swarmIsEncryptedMsg = "Swarm is encrypted and needs to be unlocked"
for _, d := range []*daemon.Daemon{d2, d3} {
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)
outs, _ := d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
cmd := d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result := icmd.RunCmd(cmd)
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
// unlock with the original key should fail
// Use poll here because the daemon may not have finished
result := unlock(d, unlockKey)
if result.Error == nil {
// On occasion, the daemon may not have finished
// rotating the KEK before restarting. The test is
@ -1386,13 +1396,14 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
// restart again, the new key should be required this
// time.
time.Sleep(3 * time.Second)
// Wait for the rotation to happen
// Since there are multiple rotations, we need to wait until for the number of rotations we are currently on to be reflected in the logs
// This is a little janky... its depending on specific log messages AND these are debug logs... but it is the best we can do for now.
matcher := testdaemon.ScanLogsMatchCount(testdaemon.ScanLogsMatchString("successfully rotated KEK"), i+1)
poll.WaitOn(c, d.PollCheckLogs(ctx, matcher), poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
d.Restart(c)
d.RestartNode(c)
cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result = icmd.RunCmd(cmd)
result = unlock(d, unlockKey)
}
result.Assert(c, icmd.Expected{
ExitCode: 1,
@ -1400,31 +1411,21 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
})
outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(newUnlockKey)
icmd.RunCmd(cmd).Assert(c, icmd.Success)
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
// now unlock with the rotated key, this should succeed
unlock(d, newUnlockKey).Assert(c, icmd.Success)
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)
retry := 0
for {
checkNodeLs := func(t poll.LogT) poll.Result {
// an issue sometimes prevents leader to be available right away
outs, err = d.Cmd("node", "ls")
if err != nil && retry < 5 {
if strings.Contains(outs, "swarm does not have a leader") {
retry++
c.Logf("[%s] got 'swarm does not have a leader'. retrying (attempt %d/5)", d.ID(), retry)
time.Sleep(3 * time.Second)
continue
} else {
c.Logf("[%s] gave error: '%v'. retrying (attempt %d/5): %s", d.ID(), err, retry, outs)
out, err := d.Cmd("node", "ls")
if err != nil {
return poll.Continue("error running node ls: %v: %s", err, out)
}
return poll.Success()
}
assert.NilError(c, err, "[%s] failed after %d retries: %v (%s)", d.ID(), retry, err, outs)
assert.Assert(c, !strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
break
}
poll.WaitOn(c, checkNodeLs, poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
}
unlockKey = newUnlockKey

View file

@ -338,6 +338,17 @@ func ScanLogsMatchString(contains string) func(string) bool {
}
}
// ScanLogsMatchCount returns a function that can be used to scan the daemon logs until the passed in matcher function matches `count` times
func ScanLogsMatchCount(f func(string) bool, count int) func(string) bool {
matched := 0
return func(line string) bool {
if f(line) {
matched++
}
return matched == count
}
}
// ScanLogsMatchAll returns a function that can be used to scan the daemon logs until *all* the passed in strings are matched
func ScanLogsMatchAll(contains ...string) func(string) bool {
matched := make(map[string]bool)