浏览代码

daemon: Maintain container exec-inspect invariant

We have integration tests which assert the invariant that a
GET /containers/{id}/json response lists only IDs of execs which are in
the Running state, according to GET /exec/{id}/json. The invariant could
be violated if those requests were to race the handling of the exec's
task-exit event. The coarse-grained locking of the container ExecStore
when starting an exec task was accidentally synchronizing
(*Daemon).ProcessEvent and (*Daemon).ContainerExecInspect to it just
enough to make it improbable for the integration tests to catch the
invariant violation on execs which exit immediately. Removing the
unnecessary locking made the underlying race condition more likely for
the tests to hit.

Maintain the invariant by deleting the exec from its container's
ExecCommands before clearing its Running flag. Additionally, fix other
potential data races with execs by ensuring that the ExecConfig lock is
held whenever a mutable field is read from or written to.

Signed-off-by: Cory Snider <csnider@mirantis.com>
Cory Snider 2 年之前
父节点
当前提交
a09f8dbe6e
共有 4 个文件被更改,包括 23 次插入9 次删除
  1. 2 2
      daemon/exec.go
  2. 12 3
      daemon/health.go
  3. 2 0
      daemon/inspect.go
  4. 7 4
      daemon/monitor.go

+ 2 - 2
daemon/exec.go

@@ -183,6 +183,7 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
 	defer func() {
 	defer func() {
 		if err != nil {
 		if err != nil {
 			ec.Lock()
 			ec.Lock()
+			ec.Container.ExecCommands.Delete(ec.ID)
 			ec.Running = false
 			ec.Running = false
 			exitCode := 126
 			exitCode := 126
 			ec.ExitCode = &exitCode
 			ec.ExitCode = &exitCode
@@ -190,7 +191,6 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
 				logrus.Errorf("failed to cleanup exec %s streams: %s", ec.Container.ID, err)
 				logrus.Errorf("failed to cleanup exec %s streams: %s", ec.Container.ID, err)
 			}
 			}
 			ec.Unlock()
 			ec.Unlock()
-			ec.Container.ExecCommands.Delete(ec.ID)
 		}
 		}
 	}()
 	}()
 
 
@@ -287,7 +287,7 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
 	// close the chan to notify readiness
 	// close the chan to notify readiness
 	close(ec.Started)
 	close(ec.Started)
 	if err != nil {
 	if err != nil {
-		ec.Unlock()
+		defer ec.Unlock()
 		return translateContainerdStartErr(ec.Entrypoint, ec.SetExitCode, err)
 		return translateContainerdStartErr(ec.Entrypoint, ec.SetExitCode, err)
 	}
 	}
 	ec.Unlock()
 	ec.Unlock()

+ 12 - 3
daemon/health.go

@@ -149,14 +149,23 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, cntr *container.Container
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	if info.ExitCode == nil {
-		return nil, fmt.Errorf("healthcheck for container %s has no exit code", cntr.ID)
+	exitCode, err := func() (int, error) {
+		info.Lock()
+		defer info.Unlock()
+		if info.ExitCode == nil {
+			info.Unlock()
+			return 0, fmt.Errorf("healthcheck for container %s has no exit code", cntr.ID)
+		}
+		return *info.ExitCode, nil
+	}()
+	if err != nil {
+		return nil, err
 	}
 	}
 	// Note: Go's json package will handle invalid UTF-8 for us
 	// Note: Go's json package will handle invalid UTF-8 for us
 	out := output.String()
 	out := output.String()
 	return &types.HealthcheckResult{
 	return &types.HealthcheckResult{
 		End:      time.Now(),
 		End:      time.Now(),
-		ExitCode: *info.ExitCode,
+		ExitCode: exitCode,
 		Output:   out,
 		Output:   out,
 	}, nil
 	}, nil
 }
 }

+ 2 - 0
daemon/inspect.go

@@ -218,6 +218,8 @@ func (daemon *Daemon) ContainerExecInspect(id string) (*backend.ExecInspect, err
 		return nil, errExecNotFound(id)
 		return nil, errExecNotFound(id)
 	}
 	}
 
 
+	e.Lock()
+	defer e.Unlock()
 	pc := inspectExecProcessConfig(e)
 	pc := inspectExecProcessConfig(e)
 	var pid int
 	var pid int
 	if e.Process != nil {
 	if e.Process != nil {

+ 7 - 4
daemon/monitor.go

@@ -163,6 +163,13 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
 			ec := int(ei.ExitCode)
 			ec := int(ei.ExitCode)
 			execConfig.Lock()
 			execConfig.Lock()
 			defer execConfig.Unlock()
 			defer execConfig.Unlock()
+
+			// Remove the exec command from the container's store only and not the
+			// daemon's store so that the exec command can be inspected. Remove it
+			// before mutating execConfig to maintain the invariant that
+			// c.ExecCommands only contain execs in the Running state.
+			c.ExecCommands.Delete(execConfig.ID)
+
 			execConfig.ExitCode = &ec
 			execConfig.ExitCode = &ec
 			execConfig.Running = false
 			execConfig.Running = false
 
 
@@ -174,10 +181,6 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
 				logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
 				logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
 			}
 			}
 
 
-			// remove the exec command from the container's store only and not the
-			// daemon's store so that the exec command can be inspected.
-			c.ExecCommands.Delete(execConfig.ID)
-
 			exitCode = ec
 			exitCode = ec
 
 
 			go func() {
 			go func() {