소스 검색

extend health check to start service

Signed-off-by: runshenzhu <runshen.zhu@gmail.com>
Signed-off-by: Runshen Zhu <runshen.zhu@gmail.com>
runshenzhu 9 년 전
부모
커밋
a99db84b4a
2개의 변경된 파일243개의 추가작업 그리고 1개의 파일을 삭제
  1. 52 1
      daemon/cluster/executor/container/controller.go
  2. 191 0
      integration-cli/docker_cli_service_health_test.go

+ 52 - 1
daemon/cluster/executor/container/controller.go

@@ -142,7 +142,58 @@ func (r *controller) Start(ctx context.Context) error {
 		return errors.Wrap(err, "starting container failed")
 	}
 
-	return nil
+	// no health check
+	if ctnr.Config == nil || ctnr.Config.Healthcheck == nil {
+		return nil
+	}
+
+	healthCmd := ctnr.Config.Healthcheck.Test
+
+	if len(healthCmd) == 0 || healthCmd[0] == "NONE" {
+		return nil
+	}
+
+	// wait for container to be healthy
+	eventq := r.adapter.events(ctx)
+
+	var healthErr error
+	for {
+		select {
+		case event := <-eventq:
+			if !r.matchevent(event) {
+				continue
+			}
+
+			switch event.Action {
+			case "die": // exit on terminal events
+				ctnr, err := r.adapter.inspect(ctx)
+				if err != nil {
+					return errors.Wrap(err, "die event received")
+				} else if ctnr.State.ExitCode != 0 {
+					return &exitError{code: ctnr.State.ExitCode, cause: healthErr}
+				}
+
+				return nil
+			case "destroy":
+				// If we get here, something has gone wrong but we want to exit
+				// and report anyways.
+				return ErrContainerDestroyed
+			case "health_status: unhealthy":
+				// in this case, we stop the container and report unhealthy status
+				if err := r.Shutdown(ctx); err != nil {
+					return errors.Wrap(err, "unhealthy container shutdown failed")
+				}
+				// set health check error, and wait for container to fully exit ("die" event)
+				healthErr = ErrContainerUnhealthy
+			case "health_status: healthy":
+				return nil
+			}
+		case <-ctx.Done():
+			return ctx.Err()
+		case <-r.closed:
+			return r.err
+		}
+	}
 }
 
 // Wait on the container to exit.

+ 191 - 0
integration-cli/docker_cli_service_health_test.go

@@ -0,0 +1,191 @@
+// +build !windows
+
+package main
+
+import (
+	"strconv"
+	"strings"
+
+	"github.com/docker/docker/daemon/cluster/executor/container"
+	"github.com/docker/docker/pkg/integration/checker"
+	"github.com/docker/engine-api/types/swarm"
+	"github.com/go-check/check"
+)
+
+// start a service, and then make its task unhealthy during running
+// finally, unhealthy task should be detected and killed
+func (s *DockerSwarmSuite) TestServiceHealthRun(c *check.C) {
+	testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
+
+	d := s.AddDaemon(c, true, true)
+
+	// build image with health-check
+	// note: use `daemon.buildImageWithOut` to build, do not use `buildImage` to build
+	imageName := "testhealth"
+	_, _, err := d.buildImageWithOut(imageName,
+		`FROM busybox
+		RUN touch /status
+		HEALTHCHECK --interval=1s --timeout=1s --retries=1\
+		  CMD cat /status`,
+		true)
+	c.Check(err, check.IsNil)
+
+	serviceName := "healthServiceRun"
+	out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top")
+	c.Assert(err, checker.IsNil, check.Commentf(out))
+	id := strings.TrimSpace(out)
+
+	var tasks []swarm.Task
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		tasks = d.getServiceTasks(c, id)
+		return tasks, nil
+	}, checker.HasLen, 1)
+
+	task := tasks[0]
+
+	// wait for task to start
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		task = d.getTask(c, task.ID)
+		return task.Status.State, nil
+	}, checker.Equals, swarm.TaskStateStarting)
+	containerID := task.Status.ContainerStatus.ContainerID
+
+	// wait for container to be healthy
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		out, _ := d.Cmd("inspect", "--format={{.State.Health.Status}}", containerID)
+		return strings.TrimSpace(out), nil
+	}, checker.Equals, "healthy")
+
+	// make it fail
+	d.Cmd("exec", containerID, "rm", "/status")
+	// wait for container to be unhealthy
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		out, _ := d.Cmd("inspect", "--format={{.State.Health.Status}}", containerID)
+		return strings.TrimSpace(out), nil
+	}, checker.Equals, "unhealthy")
+
+	// Task should be terminated
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		task = d.getTask(c, task.ID)
+		return task.Status.State, nil
+	}, checker.Equals, swarm.TaskStateFailed)
+
+	if !strings.Contains(task.Status.Err, container.ErrContainerUnhealthy.Error()) {
+		c.Fatal("unhealthy task exits because of other error")
+	}
+}
+
+// start a service whose task is unhealthy at beginning
+// its tasks should be blocked in starting stage, until health check is passed
+func (s *DockerSwarmSuite) TestServiceHealthStart(c *check.C) {
+	testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
+
+	d := s.AddDaemon(c, true, true)
+
+	// service started from this image won't pass health check
+	imageName := "testhealth"
+	_, _, err := d.buildImageWithOut(imageName,
+		`FROM busybox
+		HEALTHCHECK --interval=1s --timeout=1s --retries=1024\
+		  CMD cat /status`,
+		true)
+	c.Check(err, check.IsNil)
+
+	serviceName := "healthServiceStart"
+	out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top")
+	c.Assert(err, checker.IsNil, check.Commentf(out))
+	id := strings.TrimSpace(out)
+
+	var tasks []swarm.Task
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		tasks = d.getServiceTasks(c, id)
+		return tasks, nil
+	}, checker.HasLen, 1)
+
+	task := tasks[0]
+
+	// wait for task to start
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		task = d.getTask(c, task.ID)
+		return task.Status.State, nil
+	}, checker.Equals, swarm.TaskStateStarting)
+
+	containerID := task.Status.ContainerStatus.ContainerID
+
+	// wait for health check to work
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		out, _ := d.Cmd("inspect", "--format={{.State.Health.FailingStreak}}", containerID)
+		failingStreak, _ := strconv.Atoi(strings.TrimSpace(out))
+		return failingStreak, nil
+	}, checker.GreaterThan, 0)
+
+	// task should be blocked at starting status
+	task = d.getTask(c, task.ID)
+	c.Assert(task.Status.State, check.Equals, swarm.TaskStateStarting)
+
+	// make it healthy
+	d.Cmd("exec", containerID, "touch", "/status")
+
+	// Task should be at running status
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		task = d.getTask(c, task.ID)
+		return task.Status.State, nil
+	}, checker.Equals, swarm.TaskStateRunning)
+}
+
+// start a service whose task is unhealthy at beginning
+// its tasks should be blocked in starting stage, until health check is passed
+func (s *DockerSwarmSuite) TestServiceHealthUpdate(c *check.C) {
+	testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
+
+	d := s.AddDaemon(c, true, true)
+
+	// service started from this image won't pass health check
+	imageName := "testhealth"
+	_, _, err := d.buildImageWithOut(imageName,
+		`FROM busybox
+		HEALTHCHECK --interval=1s --timeout=1s --retries=1024\
+		  CMD cat /status`,
+		true)
+	c.Check(err, check.IsNil)
+
+	serviceName := "healthServiceStart"
+	out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top")
+	c.Assert(err, checker.IsNil, check.Commentf(out))
+	id := strings.TrimSpace(out)
+
+	var tasks []swarm.Task
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		tasks = d.getServiceTasks(c, id)
+		return tasks, nil
+	}, checker.HasLen, 1)
+
+	task := tasks[0]
+
+	// wait for task to start
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		task = d.getTask(c, task.ID)
+		return task.Status.State, nil
+	}, checker.Equals, swarm.TaskStateStarting)
+
+	containerID := task.Status.ContainerStatus.ContainerID
+
+	// wait for health check to work
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		out, _ := d.Cmd("inspect", "--format={{.State.Health.FailingStreak}}", containerID)
+		failingStreak, _ := strconv.Atoi(strings.TrimSpace(out))
+		return failingStreak, nil
+	}, checker.GreaterThan, 0)
+
+	// task should be blocked at starting status
+	task = d.getTask(c, task.ID)
+	c.Assert(task.Status.State, check.Equals, swarm.TaskStateStarting)
+
+	// make it healthy
+	d.Cmd("exec", containerID, "touch", "/status")
+	// Task should be at running status
+	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
+		task = d.getTask(c, task.ID)
+		return task.Status.State, nil
+	}, checker.Equals, swarm.TaskStateRunning)
+}