diff --git a/container/container.go b/container/container.go index dea443df8a..d0f70e139e 100644 --- a/container/container.go +++ b/container/container.go @@ -81,17 +81,18 @@ type Container struct { Driver string OS string // MountLabel contains the options for the 'mount' command - MountLabel string - ProcessLabel string - RestartCount int - HasBeenStartedBefore bool - HasBeenManuallyStopped bool // used for unless-stopped restart policy - MountPoints map[string]*volumemounts.MountPoint - HostConfig *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable - ExecCommands *exec.Store `json:"-"` - DependencyStore agentexec.DependencyGetter `json:"-"` - SecretReferences []*swarmtypes.SecretReference - ConfigReferences []*swarmtypes.ConfigReference + MountLabel string + ProcessLabel string + RestartCount int + HasBeenStartedBefore bool + HasBeenManuallyStopped bool // used for unless-stopped restart policy + HasBeenManuallyRestarted bool `json:"-"` // used to distinguish restart caused by restart policy from the manual one + MountPoints map[string]*volumemounts.MountPoint + HostConfig *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable + ExecCommands *exec.Store `json:"-"` + DependencyStore agentexec.DependencyGetter `json:"-"` + SecretReferences []*swarmtypes.SecretReference + ConfigReferences []*swarmtypes.ConfigReference // logDriver for closing LogDriver logger.Logger `json:"-"` LogCopier *logger.Copier `json:"-"` diff --git a/container/state.go b/container/state.go index fe2b060d72..1267c8694a 100644 --- a/container/state.go +++ b/container/state.go @@ -32,9 +32,10 @@ type State struct { StartedAt time.Time FinishedAt time.Time Health *Health + Removed bool `json:"-"` - waitStop chan struct{} - waitRemove chan struct{} + stopWaiters []chan<- StateStatus + removeOnlyWaiters []chan<- StateStatus } // StateStatus is used to return container wait results. @@ -57,12 +58,9 @@ func (s StateStatus) Err() error { return s.err } -// NewState creates a default state object with a fresh channel for state changes. +// NewState creates a default state object. func NewState() *State { - return &State{ - waitStop: make(chan struct{}), - waitRemove: make(chan struct{}), - } + return &State{} } // String returns a human-readable description of the state @@ -182,11 +180,10 @@ func (s *State) Wait(ctx context.Context, condition WaitCondition) <-chan StateS s.Lock() defer s.Unlock() - if condition == WaitConditionNotRunning && !s.Running { - // Buffer so we can put it in the channel now. - resultC := make(chan StateStatus, 1) + // Buffer so we can put status and finish even nobody receives it. + resultC := make(chan StateStatus, 1) - // Send the current status. + if s.conditionAlreadyMet(condition) { resultC <- StateStatus{ exitCode: s.ExitCode(), err: s.Err(), @@ -195,20 +192,17 @@ func (s *State) Wait(ctx context.Context, condition WaitCondition) <-chan StateS return resultC } - // If we are waiting only for removal, the waitStop channel should - // remain nil and block forever. - var waitStop chan struct{} - if condition < WaitConditionRemoved { - waitStop = s.waitStop + waitC := make(chan StateStatus, 1) + + // Removal wakes up both removeOnlyWaiters and stopWaiters + // Container could be removed while still in "created" state + // in which case it is never actually stopped + if condition == WaitConditionRemoved { + s.removeOnlyWaiters = append(s.removeOnlyWaiters, waitC) + } else { + s.stopWaiters = append(s.stopWaiters, waitC) } - // Always wait for removal, just in case the container gets removed - // while it is still in a "created" state, in which case it is never - // actually stopped. - waitRemove := s.waitRemove - - resultC := make(chan StateStatus, 1) - go func() { select { case <-ctx.Done(): @@ -218,23 +212,25 @@ func (s *State) Wait(ctx context.Context, condition WaitCondition) <-chan StateS err: ctx.Err(), } return - case <-waitStop: - case <-waitRemove: + case status := <-waitC: + resultC <- status } - - s.Lock() - result := StateStatus{ - exitCode: s.ExitCode(), - err: s.Err(), - } - s.Unlock() - - resultC <- result }() return resultC } +func (s *State) conditionAlreadyMet(condition WaitCondition) bool { + switch condition { + case WaitConditionNotRunning: + return !s.Running + case WaitConditionRemoved: + return s.Removed + } + + return false +} + // IsRunning returns whether the running flag is set. Used by Container to check whether a container is running. func (s *State) IsRunning() bool { s.Lock() @@ -292,8 +288,8 @@ func (s *State) SetStopped(exitStatus *ExitStatus) { } s.ExitCodeValue = exitStatus.ExitCode s.OOMKilled = exitStatus.OOMKilled - close(s.waitStop) // fire waiters for stop - s.waitStop = make(chan struct{}) + + s.notifyAndClear(&s.stopWaiters) } // SetRestarting sets the container state to "restarting" without locking. @@ -308,8 +304,8 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) { s.FinishedAt = time.Now().UTC() s.ExitCodeValue = exitStatus.ExitCode s.OOMKilled = exitStatus.OOMKilled - close(s.waitStop) // fire waiters for stop - s.waitStop = make(chan struct{}) + + s.notifyAndClear(&s.stopWaiters) } // SetError sets the container's error state. This is useful when we want to @@ -374,22 +370,19 @@ func (s *State) IsDead() bool { return res } -// SetRemoved assumes this container is already in the "dead" state and -// closes the internal waitRemove channel to unblock callers waiting for a -// container to be removed. +// SetRemoved assumes this container is already in the "dead" state and notifies all waiters. func (s *State) SetRemoved() { s.SetRemovalError(nil) } // SetRemovalError is to be called in case a container remove failed. -// It sets an error and closes the internal waitRemove channel to unblock -// callers waiting for the container to be removed. +// It sets an error and notifies all waiters. func (s *State) SetRemovalError(err error) { s.SetError(err) s.Lock() - close(s.waitRemove) // Unblock those waiting on remove. - // Recreate the channel so next ContainerWait will work - s.waitRemove = make(chan struct{}) + s.Removed = true + s.notifyAndClear(&s.removeOnlyWaiters) + s.notifyAndClear(&s.stopWaiters) s.Unlock() } @@ -400,3 +393,15 @@ func (s *State) Err() error { } return nil } + +func (s *State) notifyAndClear(waiters *[]chan<- StateStatus) { + result := StateStatus{ + exitCode: s.ExitCodeValue, + err: s.Err(), + } + + for _, c := range *waiters { + c <- result + } + *waiters = nil +} diff --git a/container/state_test.go b/container/state_test.go index bf114ea1aa..09dfb56089 100644 --- a/container/state_test.go +++ b/container/state_test.go @@ -169,6 +169,31 @@ func TestStateTimeoutWait(t *testing.T) { } } +// Related issue: #39352 +func TestCorrectStateWaitResultAfterRestart(t *testing.T) { + s := NewState() + + s.Lock() + s.SetRunning(0, true) + s.Unlock() + + waitC := s.Wait(context.Background(), WaitConditionNotRunning) + want := ExitStatus{ExitCode: 10, ExitedAt: time.Now()} + + s.Lock() + s.SetRestarting(&want) + s.Unlock() + + s.Lock() + s.SetRunning(0, true) + s.Unlock() + + got := <-waitC + if got.exitCode != want.ExitCode { + t.Fatalf("expected exit code %v, got %v", want.ExitCode, got.exitCode) + } +} + func TestIsValidStateString(t *testing.T) { states := []struct { state string