state/Wait: Fix race when reading exit status

Before this change there was a race condition between State.Wait reading the exit code from State and the State being changed instantly after the change which ended the State.Wait. Now, each State.Wait has its own channel which is used to transmit the desired StateStatus at the time the state transitions to the awaited one. Wait no longer reads the status by itself so there is no race. The issue caused the `docker run --restart=always ...' to sometimes exit with 0 exit code, because the process was already restarted by the time State.Wait got the chance to read the exit code. Test run -------- Before: ``` $ go test -count 1 -run TestCorrectStateWaitResultAfterRestart . --- FAIL: TestCorrectStateWaitResultAfterRestart (0.00s) state_test.go:198: expected exit code 10, got 0 FAIL FAIL github.com/docker/docker/container 0.011s FAIL ``` After: ``` $ go test -count 1 -run TestCorrectStateWaitResultAfterRestart . ok github.com/docker/docker/container 0.011s ``` Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
2022-07-19 12:59:46 +02:00 · 2022-07-19 12:59:46 +02:00 · 85b9568d0e
commit 85b9568d0e
parent 826003ecae
3 changed files with 88 additions and 57 deletions
--- a/container/container.go
+++ b/container/container.go
@ -81,17 +81,18 @@ type Container struct {
 	Driver          string
 	OS              string
 	// MountLabel contains the options for the 'mount' command
-	MountLabel             string
-	ProcessLabel           string
-	RestartCount           int
-	HasBeenStartedBefore   bool
-	HasBeenManuallyStopped bool // used for unless-stopped restart policy
-	MountPoints            map[string]*volumemounts.MountPoint
-	HostConfig             *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable
-	ExecCommands           *exec.Store                `json:"-"`
-	DependencyStore        agentexec.DependencyGetter `json:"-"`
-	SecretReferences       []*swarmtypes.SecretReference
-	ConfigReferences       []*swarmtypes.ConfigReference
+	MountLabel               string
+	ProcessLabel             string
+	RestartCount             int
+	HasBeenStartedBefore     bool
+	HasBeenManuallyStopped   bool // used for unless-stopped restart policy
+	HasBeenManuallyRestarted bool `json:"-"` // used to distinguish restart caused by restart policy from the manual one
+	MountPoints              map[string]*volumemounts.MountPoint
+	HostConfig               *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable
+	ExecCommands             *exec.Store                `json:"-"`
+	DependencyStore          agentexec.DependencyGetter `json:"-"`
+	SecretReferences         []*swarmtypes.SecretReference
+	ConfigReferences         []*swarmtypes.ConfigReference
 	// logDriver for closing
 	LogDriver      logger.Logger  `json:"-"`
 	LogCopier      *logger.Copier `json:"-"`
--- a/container/state.go
+++ b/container/state.go
@ -32,9 +32,10 @@ type State struct {
 	StartedAt         time.Time
 	FinishedAt        time.Time
 	Health            *Health
+	Removed           bool `json:"-"`

-	waitStop   chan struct{}
-	waitRemove chan struct{}
+	stopWaiters       []chan<- StateStatus
+	removeOnlyWaiters []chan<- StateStatus
 }

 // StateStatus is used to return container wait results.
@ -57,12 +58,9 @@ func (s StateStatus) Err() error {
 	return s.err
 }

-// NewState creates a default state object with a fresh channel for state changes.
+// NewState creates a default state object.
 func NewState() *State {
-	return &State{
-		waitStop:   make(chan struct{}),
-		waitRemove: make(chan struct{}),
-	}
+	return &State{}
 }

 // String returns a human-readable description of the state
@ -182,11 +180,10 @@ func (s *State) Wait(ctx context.Context, condition WaitCondition) <-chan StateS
 	s.Lock()
 	defer s.Unlock()

-	if condition == WaitConditionNotRunning && !s.Running {
-		// Buffer so we can put it in the channel now.
-		resultC := make(chan StateStatus, 1)
+	// Buffer so we can put status and finish even nobody receives it.
+	resultC := make(chan StateStatus, 1)

-		// Send the current status.
+	if s.conditionAlreadyMet(condition) {
 		resultC <- StateStatus{
 			exitCode: s.ExitCode(),
 			err:      s.Err(),
@ -195,20 +192,17 @@ func (s *State) Wait(ctx context.Context, condition WaitCondition) <-chan StateS
 		return resultC
 	}

-	// If we are waiting only for removal, the waitStop channel should
-	// remain nil and block forever.
-	var waitStop chan struct{}
-	if condition < WaitConditionRemoved {
-		waitStop = s.waitStop
+	waitC := make(chan StateStatus, 1)
+
+	// Removal wakes up both removeOnlyWaiters and stopWaiters
+	// Container could be removed while still in "created" state
+	// in which case it is never actually stopped
+	if condition == WaitConditionRemoved {
+		s.removeOnlyWaiters = append(s.removeOnlyWaiters, waitC)
+	} else {
+		s.stopWaiters = append(s.stopWaiters, waitC)
 	}

-	// Always wait for removal, just in case the container gets removed
-	// while it is still in a "created" state, in which case it is never
-	// actually stopped.
-	waitRemove := s.waitRemove
-
-	resultC := make(chan StateStatus, 1)
-
 	go func() {
 		select {
 		case <-ctx.Done():
@ -218,23 +212,25 @@ func (s *State) Wait(ctx context.Context, condition WaitCondition) <-chan StateS
 				err:      ctx.Err(),
 			}
 			return
-		case <-waitStop:
-		case <-waitRemove:
+		case status := <-waitC:
+			resultC <- status
 		}
-
-		s.Lock()
-		result := StateStatus{
-			exitCode: s.ExitCode(),
-			err:      s.Err(),
-		}
-		s.Unlock()
-
-		resultC <- result
 	}()

 	return resultC
 }

+func (s *State) conditionAlreadyMet(condition WaitCondition) bool {
+	switch condition {
+	case WaitConditionNotRunning:
+		return !s.Running
+	case WaitConditionRemoved:
+		return s.Removed
+	}
+
+	return false
+}
+
 // IsRunning returns whether the running flag is set. Used by Container to check whether a container is running.
 func (s *State) IsRunning() bool {
 	s.Lock()
@ -292,8 +288,8 @@ func (s *State) SetStopped(exitStatus *ExitStatus) {
 	}
 	s.ExitCodeValue = exitStatus.ExitCode
 	s.OOMKilled = exitStatus.OOMKilled
-	close(s.waitStop) // fire waiters for stop
-	s.waitStop = make(chan struct{})
+
+	s.notifyAndClear(&s.stopWaiters)
 }

 // SetRestarting sets the container state to "restarting" without locking.
@ -308,8 +304,8 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
 	s.FinishedAt = time.Now().UTC()
 	s.ExitCodeValue = exitStatus.ExitCode
 	s.OOMKilled = exitStatus.OOMKilled
-	close(s.waitStop) // fire waiters for stop
-	s.waitStop = make(chan struct{})
+
+	s.notifyAndClear(&s.stopWaiters)
 }

 // SetError sets the container's error state. This is useful when we want to
@ -374,22 +370,19 @@ func (s *State) IsDead() bool {
 	return res
 }

-// SetRemoved assumes this container is already in the "dead" state and
-// closes the internal waitRemove channel to unblock callers waiting for a
-// container to be removed.
+// SetRemoved assumes this container is already in the "dead" state and notifies all waiters.
 func (s *State) SetRemoved() {
 	s.SetRemovalError(nil)
 }

 // SetRemovalError is to be called in case a container remove failed.
-// It sets an error and closes the internal waitRemove channel to unblock
-// callers waiting for the container to be removed.
+// It sets an error and notifies all waiters.
 func (s *State) SetRemovalError(err error) {
 	s.SetError(err)
 	s.Lock()
-	close(s.waitRemove) // Unblock those waiting on remove.
-	// Recreate the channel so next ContainerWait will work
-	s.waitRemove = make(chan struct{})
+	s.Removed = true
+	s.notifyAndClear(&s.removeOnlyWaiters)
+	s.notifyAndClear(&s.stopWaiters)
 	s.Unlock()
 }

@ -400,3 +393,15 @@ func (s *State) Err() error {
 	}
 	return nil
 }
+
+func (s *State) notifyAndClear(waiters *[]chan<- StateStatus) {
+	result := StateStatus{
+		exitCode: s.ExitCodeValue,
+		err:      s.Err(),
+	}
+
+	for _, c := range *waiters {
+		c <- result
+	}
+	*waiters = nil
+}
--- a/container/state_test.go
+++ b/container/state_test.go
@ -169,6 +169,31 @@ func TestStateTimeoutWait(t *testing.T) {
 	}
 }

+// Related issue: #39352
+func TestCorrectStateWaitResultAfterRestart(t *testing.T) {
+	s := NewState()
+
+	s.Lock()
+	s.SetRunning(0, true)
+	s.Unlock()
+
+	waitC := s.Wait(context.Background(), WaitConditionNotRunning)
+	want := ExitStatus{ExitCode: 10, ExitedAt: time.Now()}
+
+	s.Lock()
+	s.SetRestarting(&want)
+	s.Unlock()
+
+	s.Lock()
+	s.SetRunning(0, true)
+	s.Unlock()
+
+	got := <-waitC
+	if got.exitCode != want.ExitCode {
+		t.Fatalf("expected exit code %v, got %v", want.ExitCode, got.exitCode)
+	}
+}
+
 func TestIsValidStateString(t *testing.T) {
 	states := []struct {
 		state    string