moby/integration-cli/docker_cli_health_test.go
Thomas Leonard b6c7becbfe
Add support for user-defined healthchecks
This PR adds support for user-defined health-check probes for Docker
containers. It adds a `HEALTHCHECK` instruction to the Dockerfile syntax plus
some corresponding "docker run" options. It can be used with a restart policy
to automatically restart a container if the check fails.

The `HEALTHCHECK` instruction has two forms:

* `HEALTHCHECK [OPTIONS] CMD command` (check container health by running a command inside the container)
* `HEALTHCHECK NONE` (disable any healthcheck inherited from the base image)

The `HEALTHCHECK` instruction tells Docker how to test a container to check that
it is still working. This can detect cases such as a web server that is stuck in
an infinite loop and unable to handle new connections, even though the server
process is still running.

When a container has a healthcheck specified, it has a _health status_ in
addition to its normal status. This status is initially `starting`. Whenever a
health check passes, it becomes `healthy` (whatever state it was previously in).
After a certain number of consecutive failures, it becomes `unhealthy`.

The options that can appear before `CMD` are:

* `--interval=DURATION` (default: `30s`)
* `--timeout=DURATION` (default: `30s`)
* `--retries=N` (default: `1`)

The health check will first run **interval** seconds after the container is
started, and then again **interval** seconds after each previous check completes.

If a single run of the check takes longer than **timeout** seconds then the check
is considered to have failed.

It takes **retries** consecutive failures of the health check for the container
to be considered `unhealthy`.

There can only be one `HEALTHCHECK` instruction in a Dockerfile. If you list
more than one then only the last `HEALTHCHECK` will take effect.

The command after the `CMD` keyword can be either a shell command (e.g. `HEALTHCHECK
CMD /bin/check-running`) or an _exec_ array (as with other Dockerfile commands;
see e.g. `ENTRYPOINT` for details).

The command's exit status indicates the health status of the container.
The possible values are:

- 0: success - the container is healthy and ready for use
- 1: unhealthy - the container is not working correctly
- 2: starting - the container is not ready for use yet, but is working correctly

If the probe returns 2 ("starting") when the container has already moved out of the
"starting" state then it is treated as "unhealthy" instead.

For example, to check every five minutes or so that a web-server is able to
serve the site's main page within three seconds:

    HEALTHCHECK --interval=5m --timeout=3s \
      CMD curl -f http://localhost/ || exit 1

To help debug failing probes, any output text (UTF-8 encoded) that the command writes
on stdout or stderr will be stored in the health status and can be queried with
`docker inspect`. Such output should be kept short (only the first 4096 bytes
are stored currently).

When the health status of a container changes, a `health_status` event is
generated with the new status. The health status is also displayed in the
`docker ps` output.

Signed-off-by: Thomas Leonard <thomas.leonard@docker.com>
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2016-06-02 23:58:34 +02:00

154 lines
4.9 KiB
Go

package main
import (
"encoding/json"
"github.com/docker/docker/pkg/integration/checker"
"github.com/docker/engine-api/types"
"github.com/go-check/check"
"strconv"
"strings"
"time"
)
func waitForStatus(c *check.C, name string, prev string, expected string) {
prev = prev + "\n"
expected = expected + "\n"
for {
out, _ := dockerCmd(c, "inspect", "--format={{.State.Status}}", name)
if out == expected {
return
}
c.Check(out, checker.Equals, prev)
if out != prev {
return
}
time.Sleep(100 * time.Millisecond)
}
}
func waitForHealthStatus(c *check.C, name string, prev string, expected string) {
prev = prev + "\n"
expected = expected + "\n"
for {
out, _ := dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", name)
if out == expected {
return
}
c.Check(out, checker.Equals, prev)
if out != prev {
return
}
time.Sleep(100 * time.Millisecond)
}
}
func getHealth(c *check.C, name string) *types.Health {
out, _ := dockerCmd(c, "inspect", "--format={{json .State.Health}}", name)
var health types.Health
err := json.Unmarshal([]byte(out), &health)
c.Check(err, checker.Equals, nil)
return &health
}
func (s *DockerSuite) TestHealth(c *check.C) {
testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
imageName := "testhealth"
_, err := buildImage(imageName,
`FROM busybox
RUN echo OK > /status
CMD ["/bin/sleep", "120"]
STOPSIGNAL SIGKILL
HEALTHCHECK --interval=1s --timeout=30s \
CMD cat /status`,
true)
c.Check(err, check.IsNil)
// No health status before starting
name := "test_health"
dockerCmd(c, "create", "--name", name, imageName)
out, _ := dockerCmd(c, "ps", "-a", "--format={{.Status}}")
c.Check(out, checker.Equals, "Created\n")
// Inspect the options
out, _ = dockerCmd(c, "inspect",
"--format='timeout={{.Config.Healthcheck.Timeout}} "+
"interval={{.Config.Healthcheck.Interval}} "+
"retries={{.Config.Healthcheck.Retries}} "+
"test={{.Config.Healthcheck.Test}}'", name)
c.Check(out, checker.Equals, "timeout=30s interval=1s retries=0 test=[CMD-SHELL cat /status]\n")
// Start
dockerCmd(c, "start", name)
waitForHealthStatus(c, name, "starting", "healthy")
// Make it fail
dockerCmd(c, "exec", name, "rm", "/status")
waitForHealthStatus(c, name, "healthy", "unhealthy")
// Inspect the status
out, _ = dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", name)
c.Check(out, checker.Equals, "unhealthy\n")
// Make it healthy again
dockerCmd(c, "exec", name, "touch", "/status")
waitForHealthStatus(c, name, "unhealthy", "healthy")
// Remove container
dockerCmd(c, "rm", "-f", name)
// Disable the check from the CLI
out, _ = dockerCmd(c, "create", "--name=noh", "--no-healthcheck", imageName)
out, _ = dockerCmd(c, "inspect", "--format={{.Config.Healthcheck.Test}}", "noh")
c.Check(out, checker.Equals, "[NONE]\n")
dockerCmd(c, "rm", "noh")
// Disable the check with a new build
_, err = buildImage("no_healthcheck",
`FROM testhealth
HEALTHCHECK NONE`, true)
c.Check(err, check.IsNil)
out, _ = dockerCmd(c, "inspect", "--format={{.ContainerConfig.Healthcheck.Test}}", "no_healthcheck")
c.Check(out, checker.Equals, "[NONE]\n")
// Enable the checks from the CLI
_, _ = dockerCmd(c, "run", "-d", "--name=fatal_healthcheck",
"--health-interval=0.5s",
"--health-retries=3",
"--health-cmd=cat /status",
"no_healthcheck")
waitForHealthStatus(c, "fatal_healthcheck", "starting", "healthy")
health := getHealth(c, "fatal_healthcheck")
c.Check(health.Status, checker.Equals, "healthy")
c.Check(health.FailingStreak, checker.Equals, 0)
last := health.Log[len(health.Log)-1]
c.Check(last.ExitCode, checker.Equals, 0)
c.Check(last.Output, checker.Equals, "OK\n")
// Fail the check, which should now make it exit
dockerCmd(c, "exec", "fatal_healthcheck", "rm", "/status")
waitForStatus(c, "fatal_healthcheck", "running", "exited")
out, _ = dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", "fatal_healthcheck")
c.Check(out, checker.Equals, "unhealthy\n")
failsStr, _ := dockerCmd(c, "inspect", "--format={{.State.Health.FailingStreak}}", "fatal_healthcheck")
fails, err := strconv.Atoi(strings.TrimSpace(failsStr))
c.Check(err, check.IsNil)
c.Check(fails >= 3, checker.Equals, true)
dockerCmd(c, "rm", "-f", "fatal_healthcheck")
// Check timeout
// Note: if the interval is too small, it seems that Docker spends all its time running health
// checks and never gets around to killing it.
_, _ = dockerCmd(c, "run", "-d", "--name=test",
"--health-interval=1s", "--health-cmd=sleep 5m", "--health-timeout=1ms", imageName)
waitForHealthStatus(c, "test", "starting", "unhealthy")
health = getHealth(c, "test")
last = health.Log[len(health.Log)-1]
c.Check(health.Status, checker.Equals, "unhealthy")
c.Check(last.ExitCode, checker.Equals, -1)
c.Check(last.Output, checker.Equals, "Health check exceeded timeout (1ms)")
dockerCmd(c, "rm", "-f", "test")
}