Merge pull request #47009 from cpuguy83/swarm_rotate_key_flake

De-flake TestSwarmClusterRotateUnlockKey... again... maybe?
This commit is contained in:
Sebastiaan van Stijn 2024-01-19 23:13:34 +01:00 committed by GitHub
commit 75fd873fe6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 69 additions and 54 deletions

View file

@ -657,6 +657,9 @@ func (s *DockerSwarmSuite) TearDownTest(ctx context.Context, c *testing.T) {
s.daemonsLock.Lock() s.daemonsLock.Lock()
for _, d := range s.daemons { for _, d := range s.daemons {
if d != nil { if d != nil {
if c.Failed() {
d.TailLogsT(c, 100)
}
d.Stop(c) d.Stop(c)
d.Cleanup(c) d.Cleanup(c)
} }

View file

@ -27,6 +27,7 @@ import (
remoteipam "github.com/docker/docker/libnetwork/ipams/remote/api" remoteipam "github.com/docker/docker/libnetwork/ipams/remote/api"
"github.com/docker/docker/pkg/plugins" "github.com/docker/docker/pkg/plugins"
"github.com/docker/docker/testutil" "github.com/docker/docker/testutil"
testdaemon "github.com/docker/docker/testutil/daemon"
"github.com/moby/swarmkit/v2/ca/keyutils" "github.com/moby/swarmkit/v2/ca/keyutils"
"github.com/vishvananda/netlink" "github.com/vishvananda/netlink"
"gotest.tools/v3/assert" "gotest.tools/v3/assert"
@ -1260,6 +1261,8 @@ func (s *DockerSwarmSuite) TestSwarmJoinPromoteLocked(c *testing.T) {
poll.WaitOn(c, pollCheck(c, d3.CheckLocalNodeState(ctx), checker.Equals(swarm.LocalNodeStateActive)), poll.WithTimeout(time.Second)) poll.WaitOn(c, pollCheck(c, d3.CheckLocalNodeState(ctx), checker.Equals(swarm.LocalNodeStateActive)), poll.WithTimeout(time.Second))
} }
const swarmIsEncryptedMsg = "Swarm is encrypted and needs to be unlocked"
func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) { func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
ctx := testutil.GetContext(c) ctx := testutil.GetContext(c)
d := s.AddDaemon(ctx, c, true, true) d := s.AddDaemon(ctx, c, true, true)
@ -1280,12 +1283,16 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
d.RestartNode(c) d.RestartNode(c)
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked) assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)
outs, _ = d.Cmd("node", "ls") unlock := func(d *daemon.Daemon, key string) *icmd.Result {
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs) cmd := d.Command("swarm", "unlock")
cmd := d.Command("swarm", "unlock") cmd.Stdin = strings.NewReader(key)
cmd.Stdin = bytes.NewBufferString(unlockKey) return icmd.RunCmd(cmd)
result := icmd.RunCmd(cmd) }
outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
result := unlock(d, unlockKey)
if result.Error == nil { if result.Error == nil {
// On occasion, the daemon may not have finished // On occasion, the daemon may not have finished
// rotating the KEK before restarting. The test is // rotating the KEK before restarting. The test is
@ -1295,13 +1302,16 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
// restart again, the new key should be required this // restart again, the new key should be required this
// time. // time.
time.Sleep(3 * time.Second) // Wait for the rotation to happen
// Since there are multiple rotations, we need to wait until for the number of rotations we are currently on to be reflected in the logs
// This is a little janky... its depending on specific log messages AND these are debug logs... but it is the best we can do for now.
matcher := testdaemon.ScanLogsMatchCount(testdaemon.ScanLogsMatchString("successfully rotated KEK"), i+1)
poll.WaitOn(c, d.PollCheckLogs(ctx, matcher), poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
d.Restart(c)
d.RestartNode(c) d.RestartNode(c)
cmd = d.Command("swarm", "unlock") result = unlock(d, unlockKey)
cmd.Stdin = bytes.NewBufferString(unlockKey)
result = icmd.RunCmd(cmd)
} }
result.Assert(c, icmd.Expected{ result.Assert(c, icmd.Expected{
ExitCode: 1, ExitCode: 1,
@ -1309,28 +1319,20 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *testing.T) {
}) })
outs, _ = d.Cmd("node", "ls") outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs) assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
cmd = d.Command("swarm", "unlock") unlock(d, newUnlockKey).Assert(c, icmd.Success)
cmd.Stdin = bytes.NewBufferString(newUnlockKey)
icmd.RunCmd(cmd).Assert(c, icmd.Success)
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive) assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)
retry := 0 checkNodeLs := func(t poll.LogT) poll.Result {
for {
// an issue sometimes prevents leader to be available right away // an issue sometimes prevents leader to be available right away
outs, err = d.Cmd("node", "ls") out, err := d.Cmd("node", "ls")
if err != nil && retry < 5 { if err != nil {
if strings.Contains(outs, "swarm does not have a leader") { return poll.Continue("error running node ls: %v: %s", err, out)
retry++
time.Sleep(3 * time.Second)
continue
}
} }
assert.NilError(c, err) return poll.Success()
assert.Assert(c, !strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
break
} }
poll.WaitOn(c, checkNodeLs, poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
unlockKey = newUnlockKey unlockKey = newUnlockKey
} }
@ -1368,15 +1370,23 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
d2.RestartNode(c) d2.RestartNode(c)
d3.RestartNode(c) d3.RestartNode(c)
unlock := func(d *daemon.Daemon, key string) *icmd.Result {
cmd := d.Command("swarm", "unlock")
cmd.Stdin = strings.NewReader(key)
return icmd.RunCmd(cmd)
}
const swarmIsEncryptedMsg = "Swarm is encrypted and needs to be unlocked"
for _, d := range []*daemon.Daemon{d2, d3} { for _, d := range []*daemon.Daemon{d2, d3} {
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked) assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateLocked)
outs, _ := d.Cmd("node", "ls") outs, _ := d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs) assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
cmd := d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result := icmd.RunCmd(cmd)
// unlock with the original key should fail
// Use poll here because the daemon may not have finished
result := unlock(d, unlockKey)
if result.Error == nil { if result.Error == nil {
// On occasion, the daemon may not have finished // On occasion, the daemon may not have finished
// rotating the KEK before restarting. The test is // rotating the KEK before restarting. The test is
@ -1386,13 +1396,14 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
// restart again, the new key should be required this // restart again, the new key should be required this
// time. // time.
time.Sleep(3 * time.Second) // Wait for the rotation to happen
// Since there are multiple rotations, we need to wait until for the number of rotations we are currently on to be reflected in the logs
// This is a little janky... its depending on specific log messages AND these are debug logs... but it is the best we can do for now.
matcher := testdaemon.ScanLogsMatchCount(testdaemon.ScanLogsMatchString("successfully rotated KEK"), i+1)
poll.WaitOn(c, d.PollCheckLogs(ctx, matcher), poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
d.Restart(c)
d.RestartNode(c) result = unlock(d, unlockKey)
cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(unlockKey)
result = icmd.RunCmd(cmd)
} }
result.Assert(c, icmd.Expected{ result.Assert(c, icmd.Expected{
ExitCode: 1, ExitCode: 1,
@ -1400,31 +1411,21 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *testing.T) {
}) })
outs, _ = d.Cmd("node", "ls") outs, _ = d.Cmd("node", "ls")
assert.Assert(c, strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs) assert.Assert(c, strings.Contains(outs, swarmIsEncryptedMsg), outs)
cmd = d.Command("swarm", "unlock")
cmd.Stdin = bytes.NewBufferString(newUnlockKey)
icmd.RunCmd(cmd).Assert(c, icmd.Success)
// now unlock with the rotated key, this should succeed
unlock(d, newUnlockKey).Assert(c, icmd.Success)
assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive) assert.Equal(c, getNodeStatus(c, d), swarm.LocalNodeStateActive)
retry := 0 checkNodeLs := func(t poll.LogT) poll.Result {
for {
// an issue sometimes prevents leader to be available right away // an issue sometimes prevents leader to be available right away
outs, err = d.Cmd("node", "ls") out, err := d.Cmd("node", "ls")
if err != nil && retry < 5 { if err != nil {
if strings.Contains(outs, "swarm does not have a leader") { return poll.Continue("error running node ls: %v: %s", err, out)
retry++
c.Logf("[%s] got 'swarm does not have a leader'. retrying (attempt %d/5)", d.ID(), retry)
time.Sleep(3 * time.Second)
continue
} else {
c.Logf("[%s] gave error: '%v'. retrying (attempt %d/5): %s", d.ID(), err, retry, outs)
}
} }
assert.NilError(c, err, "[%s] failed after %d retries: %v (%s)", d.ID(), retry, err, outs) return poll.Success()
assert.Assert(c, !strings.Contains(outs, "Swarm is encrypted and needs to be unlocked"), outs)
break
} }
poll.WaitOn(c, checkNodeLs, poll.WithDelay(3*time.Second), poll.WithTimeout(time.Minute))
} }
unlockKey = newUnlockKey unlockKey = newUnlockKey

View file

@ -338,6 +338,17 @@ func ScanLogsMatchString(contains string) func(string) bool {
} }
} }
// ScanLogsMatchCount returns a function that can be used to scan the daemon logs until the passed in matcher function matches `count` times
func ScanLogsMatchCount(f func(string) bool, count int) func(string) bool {
matched := 0
return func(line string) bool {
if f(line) {
matched++
}
return matched == count
}
}
// ScanLogsMatchAll returns a function that can be used to scan the daemon logs until *all* the passed in strings are matched // ScanLogsMatchAll returns a function that can be used to scan the daemon logs until *all* the passed in strings are matched
func ScanLogsMatchAll(contains ...string) func(string) bool { func ScanLogsMatchAll(contains ...string) func(string) bool {
matched := make(map[string]bool) matched := make(map[string]bool)