29ff2af2d3
This test has been flaky for a long time, failing with: --- FAIL: TestInspect (12.04s) inspect_test.go:39: timeout hit after 10s: waiting for tasks to enter run state. task failed with error: task: non-zero exit (1) While looking through logs, noticed tasks were started, entering RUNNING stage, and then exited, to be started again. state.transition="STARTING->RUNNING" ... msg="fatal task error" error="task: non-zero exit (1)" ... state.transition="RUNNING->FAILED" Looking for possible reasons, first considering network issues (possibly we ran out of IP addresses or networking not cleaned up), then I spotted the issue. The service is started with; Command: []string{"/bin/top"}, Args: []string{"-u", "root"}, The `-u root` is not an argument for the service, but for `/bin/top`. While the Ubuntu/Debian/GNU version `top` has a -u/-U option; docker run --rm ubuntu:20.04 top -h 2>&1 | grep '\-u' top -hv | -bcEHiOSs1 -d secs -n max -u|U user -p pid(s) -o field -w [cols] The *busybox* version of top does not: docker run --rm busybox top --help 2>&1 | grep '\-u' So running `top -u root` would cause the task to fail; docker run --rm busybox top -u root top: invalid option -- u ... echo $? 1 As a result, the service went into a crash-loop, and because the `poll.WaitOn()` was running with a short interval, in many cases would _just_ find the RUNNING state, perform the `service inspect`, and pass, but in other cases, it would not be that lucky, and continue polling untill we reached the 10 seconds timeout, and mark the test as failed. Looking for history of this option (was it previously using a different image?) I found this was added in6cd6d8646a
, but probably just missed during review. Given that the option is only set to have "something" to inspect, I replaced the `-u root` with `-d 5`, which makes top refresh with a 5 second interval. Note that there is another test (`TestServiceListWithStatuses) that uses the same spec, however, that test is skipped based on API version of the test-daemon, and (to be looked into), when performing that check, no API version is known, causing the test to (always?) be skipped: === RUN TestServiceListWithStatuses --- SKIP: TestServiceListWithStatuses (0.00s) list_test.go:34: versions.LessThan(testEnv.DaemonInfo.ServerVersion, "1.41") Signed-off-by: Sebastiaan van Stijn <github@gone.nl> (cherry picked from commit00cb3073f4
) Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
134 lines
3.8 KiB
Go
134 lines
3.8 KiB
Go
package service // import "github.com/docker/docker/integration/service"
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/docker/docker/api/types"
|
|
"github.com/docker/docker/api/types/container"
|
|
swarmtypes "github.com/docker/docker/api/types/swarm"
|
|
"github.com/docker/docker/integration/internal/swarm"
|
|
"github.com/google/go-cmp/cmp"
|
|
"gotest.tools/v3/assert"
|
|
is "gotest.tools/v3/assert/cmp"
|
|
"gotest.tools/v3/poll"
|
|
"gotest.tools/v3/skip"
|
|
)
|
|
|
|
func TestInspect(t *testing.T) {
|
|
skip.If(t, testEnv.IsRemoteDaemon)
|
|
skip.If(t, testEnv.DaemonInfo.OSType == "windows")
|
|
defer setupTest(t)()
|
|
d := swarm.NewSwarm(t, testEnv)
|
|
defer d.Stop(t)
|
|
client := d.NewClientT(t)
|
|
defer client.Close()
|
|
|
|
var now = time.Now()
|
|
var instances uint64 = 2
|
|
serviceSpec := fullSwarmServiceSpec("test-service-inspect"+t.Name(), instances)
|
|
|
|
ctx := context.Background()
|
|
resp, err := client.ServiceCreate(ctx, serviceSpec, types.ServiceCreateOptions{
|
|
QueryRegistry: false,
|
|
})
|
|
assert.NilError(t, err)
|
|
|
|
id := resp.ID
|
|
poll.WaitOn(t, swarm.RunningTasksCount(client, id, instances))
|
|
|
|
service, _, err := client.ServiceInspectWithRaw(ctx, id, types.ServiceInspectOptions{})
|
|
assert.NilError(t, err)
|
|
|
|
expected := swarmtypes.Service{
|
|
ID: id,
|
|
Spec: serviceSpec,
|
|
Meta: swarmtypes.Meta{
|
|
Version: swarmtypes.Version{Index: uint64(11)},
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
},
|
|
}
|
|
assert.Check(t, is.DeepEqual(service, expected, cmpServiceOpts()))
|
|
}
|
|
|
|
// TODO: use helpers from gotest.tools/assert/opt when available
|
|
func cmpServiceOpts() cmp.Option {
|
|
const threshold = 20 * time.Second
|
|
|
|
metaTimeFields := func(path cmp.Path) bool {
|
|
switch path.String() {
|
|
case "Meta.CreatedAt", "Meta.UpdatedAt":
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
withinThreshold := cmp.Comparer(func(x, y time.Time) bool {
|
|
delta := x.Sub(y)
|
|
return delta < threshold && delta > -threshold
|
|
})
|
|
|
|
return cmp.FilterPath(metaTimeFields, withinThreshold)
|
|
}
|
|
|
|
func fullSwarmServiceSpec(name string, replicas uint64) swarmtypes.ServiceSpec {
|
|
restartDelay := 100 * time.Millisecond
|
|
maxAttempts := uint64(4)
|
|
|
|
return swarmtypes.ServiceSpec{
|
|
Annotations: swarmtypes.Annotations{
|
|
Name: name,
|
|
Labels: map[string]string{
|
|
"service-label": "service-label-value",
|
|
},
|
|
},
|
|
TaskTemplate: swarmtypes.TaskSpec{
|
|
ContainerSpec: &swarmtypes.ContainerSpec{
|
|
Image: "busybox:latest",
|
|
Labels: map[string]string{"container-label": "container-value"},
|
|
Command: []string{"/bin/top"},
|
|
Args: []string{"-d", "5"},
|
|
Hostname: "hostname",
|
|
Env: []string{"envvar=envvalue"},
|
|
Dir: "/work",
|
|
User: "root",
|
|
StopSignal: "SIGINT",
|
|
StopGracePeriod: &restartDelay,
|
|
Hosts: []string{"8.8.8.8 google"},
|
|
DNSConfig: &swarmtypes.DNSConfig{
|
|
Nameservers: []string{"8.8.8.8"},
|
|
Search: []string{"somedomain"},
|
|
},
|
|
Isolation: container.IsolationDefault,
|
|
},
|
|
RestartPolicy: &swarmtypes.RestartPolicy{
|
|
Delay: &restartDelay,
|
|
Condition: swarmtypes.RestartPolicyConditionOnFailure,
|
|
MaxAttempts: &maxAttempts,
|
|
},
|
|
Runtime: swarmtypes.RuntimeContainer,
|
|
},
|
|
Mode: swarmtypes.ServiceMode{
|
|
Replicated: &swarmtypes.ReplicatedService{
|
|
Replicas: &replicas,
|
|
},
|
|
},
|
|
UpdateConfig: &swarmtypes.UpdateConfig{
|
|
Parallelism: 2,
|
|
Delay: 200 * time.Second,
|
|
FailureAction: swarmtypes.UpdateFailureActionContinue,
|
|
Monitor: 2 * time.Second,
|
|
MaxFailureRatio: 0.2,
|
|
Order: swarmtypes.UpdateOrderStopFirst,
|
|
},
|
|
RollbackConfig: &swarmtypes.UpdateConfig{
|
|
Parallelism: 3,
|
|
Delay: 300 * time.Second,
|
|
FailureAction: swarmtypes.UpdateFailureActionPause,
|
|
Monitor: 3 * time.Second,
|
|
MaxFailureRatio: 0.3,
|
|
Order: swarmtypes.UpdateOrderStartFirst,
|
|
},
|
|
}
|
|
}
|