moby/daemon/health_test.go

119 lines
2.9 KiB
Go
Raw Normal View History

Add support for user-defined healthchecks This PR adds support for user-defined health-check probes for Docker containers. It adds a `HEALTHCHECK` instruction to the Dockerfile syntax plus some corresponding "docker run" options. It can be used with a restart policy to automatically restart a container if the check fails. The `HEALTHCHECK` instruction has two forms: * `HEALTHCHECK [OPTIONS] CMD command` (check container health by running a command inside the container) * `HEALTHCHECK NONE` (disable any healthcheck inherited from the base image) The `HEALTHCHECK` instruction tells Docker how to test a container to check that it is still working. This can detect cases such as a web server that is stuck in an infinite loop and unable to handle new connections, even though the server process is still running. When a container has a healthcheck specified, it has a _health status_ in addition to its normal status. This status is initially `starting`. Whenever a health check passes, it becomes `healthy` (whatever state it was previously in). After a certain number of consecutive failures, it becomes `unhealthy`. The options that can appear before `CMD` are: * `--interval=DURATION` (default: `30s`) * `--timeout=DURATION` (default: `30s`) * `--retries=N` (default: `1`) The health check will first run **interval** seconds after the container is started, and then again **interval** seconds after each previous check completes. If a single run of the check takes longer than **timeout** seconds then the check is considered to have failed. It takes **retries** consecutive failures of the health check for the container to be considered `unhealthy`. There can only be one `HEALTHCHECK` instruction in a Dockerfile. If you list more than one then only the last `HEALTHCHECK` will take effect. The command after the `CMD` keyword can be either a shell command (e.g. `HEALTHCHECK CMD /bin/check-running`) or an _exec_ array (as with other Dockerfile commands; see e.g. `ENTRYPOINT` for details). The command's exit status indicates the health status of the container. The possible values are: - 0: success - the container is healthy and ready for use - 1: unhealthy - the container is not working correctly - 2: starting - the container is not ready for use yet, but is working correctly If the probe returns 2 ("starting") when the container has already moved out of the "starting" state then it is treated as "unhealthy" instead. For example, to check every five minutes or so that a web-server is able to serve the site's main page within three seconds: HEALTHCHECK --interval=5m --timeout=3s \ CMD curl -f http://localhost/ || exit 1 To help debug failing probes, any output text (UTF-8 encoded) that the command writes on stdout or stderr will be stored in the health status and can be queried with `docker inspect`. Such output should be kept short (only the first 4096 bytes are stored currently). When the health status of a container changes, a `health_status` event is generated with the new status. The health status is also displayed in the `docker ps` output. Signed-off-by: Thomas Leonard <thomas.leonard@docker.com> Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2016-04-18 09:48:13 +00:00
package daemon
import (
"testing"
"time"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/events"
"github.com/docker/engine-api/types"
containertypes "github.com/docker/engine-api/types/container"
eventtypes "github.com/docker/engine-api/types/events"
)
func reset(c *container.Container) {
c.State = &container.State{}
c.State.Health = &container.Health{}
c.State.Health.Status = types.Starting
}
func TestNoneHealthcheck(t *testing.T) {
c := &container.Container{
CommonContainer: container.CommonContainer{
ID: "container_id",
Name: "container_name",
Config: &containertypes.Config{
Image: "image_name",
Healthcheck: &containertypes.HealthConfig{
Test: []string{"NONE"},
},
},
State: &container.State{},
},
}
daemon := &Daemon{}
daemon.initHealthMonitor(c)
if c.State.Health != nil {
t.Errorf("Expecting Health to be nil, but was not")
}
}
Add support for user-defined healthchecks This PR adds support for user-defined health-check probes for Docker containers. It adds a `HEALTHCHECK` instruction to the Dockerfile syntax plus some corresponding "docker run" options. It can be used with a restart policy to automatically restart a container if the check fails. The `HEALTHCHECK` instruction has two forms: * `HEALTHCHECK [OPTIONS] CMD command` (check container health by running a command inside the container) * `HEALTHCHECK NONE` (disable any healthcheck inherited from the base image) The `HEALTHCHECK` instruction tells Docker how to test a container to check that it is still working. This can detect cases such as a web server that is stuck in an infinite loop and unable to handle new connections, even though the server process is still running. When a container has a healthcheck specified, it has a _health status_ in addition to its normal status. This status is initially `starting`. Whenever a health check passes, it becomes `healthy` (whatever state it was previously in). After a certain number of consecutive failures, it becomes `unhealthy`. The options that can appear before `CMD` are: * `--interval=DURATION` (default: `30s`) * `--timeout=DURATION` (default: `30s`) * `--retries=N` (default: `1`) The health check will first run **interval** seconds after the container is started, and then again **interval** seconds after each previous check completes. If a single run of the check takes longer than **timeout** seconds then the check is considered to have failed. It takes **retries** consecutive failures of the health check for the container to be considered `unhealthy`. There can only be one `HEALTHCHECK` instruction in a Dockerfile. If you list more than one then only the last `HEALTHCHECK` will take effect. The command after the `CMD` keyword can be either a shell command (e.g. `HEALTHCHECK CMD /bin/check-running`) or an _exec_ array (as with other Dockerfile commands; see e.g. `ENTRYPOINT` for details). The command's exit status indicates the health status of the container. The possible values are: - 0: success - the container is healthy and ready for use - 1: unhealthy - the container is not working correctly - 2: starting - the container is not ready for use yet, but is working correctly If the probe returns 2 ("starting") when the container has already moved out of the "starting" state then it is treated as "unhealthy" instead. For example, to check every five minutes or so that a web-server is able to serve the site's main page within three seconds: HEALTHCHECK --interval=5m --timeout=3s \ CMD curl -f http://localhost/ || exit 1 To help debug failing probes, any output text (UTF-8 encoded) that the command writes on stdout or stderr will be stored in the health status and can be queried with `docker inspect`. Such output should be kept short (only the first 4096 bytes are stored currently). When the health status of a container changes, a `health_status` event is generated with the new status. The health status is also displayed in the `docker ps` output. Signed-off-by: Thomas Leonard <thomas.leonard@docker.com> Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2016-04-18 09:48:13 +00:00
func TestHealthStates(t *testing.T) {
e := events.New()
_, l, _ := e.Subscribe()
defer e.Evict(l)
expect := func(expected string) {
select {
case event := <-l:
ev := event.(eventtypes.Message)
if ev.Status != expected {
t.Errorf("Expecting event %#v, but got %#v\n", expected, ev.Status)
}
case <-time.After(1 * time.Second):
t.Errorf("Expecting event %#v, but got nothing\n", expected)
}
}
c := &container.Container{
CommonContainer: container.CommonContainer{
ID: "container_id",
Name: "container_name",
Config: &containertypes.Config{
Image: "image_name",
},
},
}
daemon := &Daemon{
EventsService: e,
}
c.Config.Healthcheck = &containertypes.HealthConfig{
Retries: 1,
}
reset(c)
handleResult := func(startTime time.Time, exitCode int) {
handleProbeResult(daemon, c, &types.HealthcheckResult{
Start: startTime,
End: startTime,
ExitCode: exitCode,
})
}
// starting -> failed -> success -> failed
handleResult(c.State.StartedAt.Add(1*time.Second), 1)
expect("health_status: unhealthy")
handleResult(c.State.StartedAt.Add(2*time.Second), 0)
expect("health_status: healthy")
handleResult(c.State.StartedAt.Add(3*time.Second), 1)
expect("health_status: unhealthy")
// Test retries
reset(c)
c.Config.Healthcheck.Retries = 3
handleResult(c.State.StartedAt.Add(20*time.Second), 1)
handleResult(c.State.StartedAt.Add(40*time.Second), 1)
if c.State.Health.Status != types.Starting {
t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
}
if c.State.Health.FailingStreak != 2 {
t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
}
handleResult(c.State.StartedAt.Add(60*time.Second), 1)
expect("health_status: unhealthy")
handleResult(c.State.StartedAt.Add(80*time.Second), 0)
expect("health_status: healthy")
if c.State.Health.FailingStreak != 0 {
t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
}
}