Pārlūkot izejas kodu

daemon: kill exec process on ctx cancel

Terminating the exec process when the context is canceled has been
broken since Docker v17.11 so nobody has been able to depend upon that
behaviour in five years of releases. We are thus free from backwards-
compatibility constraints.

Co-authored-by: Nicolas De Loof <nicolas.deloof@gmail.com>
Co-authored-by: Sebastiaan van Stijn <github@gone.nl>
Signed-off-by: Nicolas De Loof <nicolas.deloof@gmail.com>
Signed-off-by: Cory Snider <csnider@mirantis.com>
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
(cherry picked from commit 4b84a3321723a849295d5cbf7342ec36077f9179)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
Cory Snider 2 gadi atpakaļ
vecāks
revīzija
e7f4963e73
3 mainītis faili ar 52 papildinājumiem un 18 dzēšanām
  1. 13 16
      daemon/exec.go
  2. 2 2
      daemon/health.go
  3. 37 0
      integration/container/health_test.go

+ 13 - 16
daemon/exec.go

@@ -23,9 +23,6 @@ import (
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 )
 )
 
 
-// Seconds to wait after sending TERM before trying KILL
-const termProcessTimeout = 10 * time.Second
-
 func (daemon *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
 func (daemon *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
 	// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
 	// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
 	container.ExecCommands.Add(config.ID, config)
 	container.ExecCommands.Add(config.ID, config)
@@ -272,7 +269,10 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
 		CloseStdin: true,
 		CloseStdin: true,
 	}
 	}
 	ec.StreamConfig.AttachStreams(&attachConfig)
 	ec.StreamConfig.AttachStreams(&attachConfig)
-	attachErr := ec.StreamConfig.CopyStreams(ctx, &attachConfig)
+	// using context.Background() so that attachErr does not race ctx.Done().
+	copyCtx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	attachErr := ec.StreamConfig.CopyStreams(copyCtx, &attachConfig)
 
 
 	// Synchronize with libcontainerd event loop
 	// Synchronize with libcontainerd event loop
 	ec.Lock()
 	ec.Lock()
@@ -292,18 +292,15 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
 
 
 	select {
 	select {
 	case <-ctx.Done():
 	case <-ctx.Done():
-		logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
-		daemon.containerd.SignalProcess(ctx, c.ID, name, signal.SignalMap["TERM"])
-
-		timeout := time.NewTimer(termProcessTimeout)
-		defer timeout.Stop()
-
-		select {
-		case <-timeout.C:
-			logrus.Infof("Container %v, process %v failed to exit within %v of signal TERM - using the force", c.ID, name, termProcessTimeout)
-			daemon.containerd.SignalProcess(ctx, c.ID, name, signal.SignalMap["KILL"])
-		case <-attachErr:
-			// TERM signal worked
+		log := logrus.
+			WithField("container", c.ID).
+			WithField("exec", name)
+		log.Debug("Sending KILL signal to container process")
+		sigCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
+		defer cancelFunc()
+		err := daemon.containerd.SignalProcess(sigCtx, c.ID, name, signal.SignalMap["KILL"])
+		if err != nil {
+			log.WithError(err).Error("Could not send KILL signal to container process")
 		}
 		}
 		return ctx.Err()
 		return ctx.Err()
 	case err := <-attachErr:
 	case err := <-attachErr:

+ 2 - 2
daemon/health.go

@@ -133,8 +133,8 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, cntr *container.Container
 	case <-tm.C:
 	case <-tm.C:
 		cancelProbe()
 		cancelProbe()
 		logrus.WithContext(ctx).Debugf("Health check for container %s taking too long", cntr.ID)
 		logrus.WithContext(ctx).Debugf("Health check for container %s taking too long", cntr.ID)
-		// Wait for probe to exit (it might take a while to respond to the TERM
-		// signal and we don't want dying probes to pile up).
+		// Wait for probe to exit (it might take some time to call containerd to kill
+		// the process and we don't want dying probes to pile up).
 		<-execErr
 		<-execErr
 		return &types.HealthcheckResult{
 		return &types.HealthcheckResult{
 			ExitCode: -1,
 			ExitCode: -1,

+ 37 - 0
integration/container/health_test.go

@@ -2,6 +2,7 @@ package container // import "github.com/docker/docker/integration/container"
 
 
 import (
 import (
 	"context"
 	"context"
+	"fmt"
 	"testing"
 	"testing"
 	"time"
 	"time"
 
 
@@ -93,6 +94,42 @@ while true; do sleep 1; done
 	poll.WaitOn(t, pollForHealthStatus(ctxPoll, client, id, "healthy"), poll.WithDelay(100*time.Millisecond))
 	poll.WaitOn(t, pollForHealthStatus(ctxPoll, client, id, "healthy"), poll.WithDelay(100*time.Millisecond))
 }
 }
 
 
+// TestHealthCheckProcessKilled verifies that health-checks exec get killed on time-out.
+func TestHealthCheckProcessKilled(t *testing.T) {
+	skip.If(t, testEnv.RuntimeIsWindowsContainerd(), "FIXME: Broken on Windows + containerd combination")
+	defer setupTest(t)()
+	ctx := context.Background()
+	apiClient := testEnv.APIClient()
+
+	cID := container.Run(ctx, t, apiClient, func(c *container.TestContainerConfig) {
+		c.Config.Healthcheck = &containertypes.HealthConfig{
+			Test:     []string{"CMD", "sh", "-c", "sleep 60"},
+			Interval: 100 * time.Millisecond,
+			Timeout:  50 * time.Millisecond,
+			Retries:  1,
+		}
+	})
+	poll.WaitOn(t, pollForHealthCheckLog(ctx, apiClient, cID, "Health check exceeded timeout (50ms)"))
+}
+
+func pollForHealthCheckLog(ctx context.Context, client client.APIClient, containerID string, expected string) func(log poll.LogT) poll.Result {
+	return func(log poll.LogT) poll.Result {
+		inspect, err := client.ContainerInspect(ctx, containerID)
+		if err != nil {
+			return poll.Error(err)
+		}
+		healthChecksTotal := len(inspect.State.Health.Log)
+		if healthChecksTotal > 0 {
+			output := inspect.State.Health.Log[healthChecksTotal-1].Output
+			if output == expected {
+				return poll.Success()
+			}
+			return poll.Error(fmt.Errorf("expected %q, got %q", expected, output))
+		}
+		return poll.Continue("waiting for container healthcheck logs")
+	}
+}
+
 func pollForHealthStatus(ctx context.Context, client client.APIClient, containerID string, healthStatus string) func(log poll.LogT) poll.Result {
 func pollForHealthStatus(ctx context.Context, client client.APIClient, containerID string, healthStatus string) func(log poll.LogT) poll.Result {
 	return func(log poll.LogT) poll.Result {
 	return func(log poll.LogT) poll.Result {
 		inspect, err := client.ContainerInspect(ctx, containerID)
 		inspect, err := client.ContainerInspect(ctx, containerID)