container: Do not remove contianer if any of the resource failed cleanup
Do not remove container if any of the resource could not be cleaned up. We don't want to leak resources. Two new states have been created. RemovalInProgress and Dead. Once container is Dead, it can not be started/restarted. Dead container signifies the container where we tried to remove it but removal failed. User now needs to figure out what went wrong, corrent the situation and try cleanup again. RemovalInProgress signifies that container is already being removed. Only one removal can be in progress. Also, do not allow start of a container if it is already dead or removal is in progress. Also extend existing force option (-f) to docker rm to not return an error and remove container from user view even if resource cleanup failed. This will allow a user to get back to old behavior where resources might leak but atleast user will be able to make progress. Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
This commit is contained in:
parent
97f621bcd8
commit
40945fc186
3 changed files with 99 additions and 20 deletions
|
@ -360,6 +360,10 @@ func (container *Container) Start() (err error) {
|
|||
return nil
|
||||
}
|
||||
|
||||
if container.removalInProgress || container.Dead {
|
||||
return fmt.Errorf("Container is marked for removal and cannot be started.")
|
||||
}
|
||||
|
||||
// if we encounter an error during start we need to ensure that any other
|
||||
// setup has been cleaned up properly
|
||||
defer func() {
|
||||
|
|
|
@ -63,8 +63,15 @@ func (daemon *Daemon) ContainerRm(job *engine.Job) error {
|
|||
return fmt.Errorf("Conflict, You cannot remove a running container. Stop the container before attempting removal or use -f")
|
||||
}
|
||||
}
|
||||
if err := daemon.Rm(container); err != nil {
|
||||
return fmt.Errorf("Cannot destroy container %s: %s", name, err)
|
||||
|
||||
if forceRemove {
|
||||
if err := daemon.ForceRm(container); err != nil {
|
||||
logrus.Errorf("Cannot destroy container %s: %v", name, err)
|
||||
}
|
||||
} else {
|
||||
if err := daemon.Rm(container); err != nil {
|
||||
return fmt.Errorf("Cannot destroy container %s: %v", name, err)
|
||||
}
|
||||
}
|
||||
container.LogEvent("destroy")
|
||||
if removeVolume {
|
||||
|
@ -83,8 +90,16 @@ func (daemon *Daemon) DeleteVolumes(volumeIDs map[string]struct{}) {
|
|||
}
|
||||
}
|
||||
|
||||
func (daemon *Daemon) Rm(container *Container) (err error) {
|
||||
return daemon.commonRm(container, false)
|
||||
}
|
||||
|
||||
func (daemon *Daemon) ForceRm(container *Container) (err error) {
|
||||
return daemon.commonRm(container, true)
|
||||
}
|
||||
|
||||
// Destroy unregisters a container from the daemon and cleanly removes its contents from the filesystem.
|
||||
func (daemon *Daemon) Rm(container *Container) error {
|
||||
func (daemon *Daemon) commonRm(container *Container, forceRemove bool) (err error) {
|
||||
if container == nil {
|
||||
return fmt.Errorf("The given container is <nil>")
|
||||
}
|
||||
|
@ -94,19 +109,40 @@ func (daemon *Daemon) Rm(container *Container) error {
|
|||
return fmt.Errorf("Container %v not found - maybe it was already destroyed?", container.ID)
|
||||
}
|
||||
|
||||
if err := container.Stop(3); err != nil {
|
||||
// Container state RemovalInProgress should be used to avoid races.
|
||||
if err = container.SetRemovalInProgress(); err != nil {
|
||||
return fmt.Errorf("Failed to set container state to RemovalInProgress: %s", err)
|
||||
}
|
||||
|
||||
defer container.ResetRemovalInProgress()
|
||||
|
||||
if err = container.Stop(3); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Deregister the container before removing its directory, to avoid race conditions
|
||||
daemon.idIndex.Delete(container.ID)
|
||||
daemon.containers.Delete(container.ID)
|
||||
// Mark container dead. We don't want anybody to be restarting it.
|
||||
container.SetDead()
|
||||
|
||||
// Save container state to disk. So that if error happens before
|
||||
// container meta file got removed from disk, then a restart of
|
||||
// docker should not make a dead container alive.
|
||||
container.ToDisk()
|
||||
|
||||
// If force removal is required, delete container from various
|
||||
// indexes even if removal failed.
|
||||
defer func() {
|
||||
if err != nil && forceRemove {
|
||||
daemon.idIndex.Delete(container.ID)
|
||||
daemon.containers.Delete(container.ID)
|
||||
}
|
||||
}()
|
||||
|
||||
container.derefVolumes()
|
||||
if _, err := daemon.containerGraph.Purge(container.ID); err != nil {
|
||||
logrus.Debugf("Unable to remove container from link graph: %s", err)
|
||||
}
|
||||
|
||||
if err := daemon.driver.Remove(container.ID); err != nil {
|
||||
if err = daemon.driver.Remove(container.ID); err != nil {
|
||||
return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", daemon.driver, container.ID, err)
|
||||
}
|
||||
|
||||
|
@ -115,15 +151,17 @@ func (daemon *Daemon) Rm(container *Container) error {
|
|||
return fmt.Errorf("Driver %s failed to remove init filesystem %s: %s", daemon.driver, initID, err)
|
||||
}
|
||||
|
||||
if err := os.RemoveAll(container.root); err != nil {
|
||||
if err = os.RemoveAll(container.root); err != nil {
|
||||
return fmt.Errorf("Unable to remove filesystem for %v: %v", container.ID, err)
|
||||
}
|
||||
|
||||
if err := daemon.execDriver.Clean(container.ID); err != nil {
|
||||
if err = daemon.execDriver.Clean(container.ID); err != nil {
|
||||
return fmt.Errorf("Unable to remove execdriver data for %s: %s", container.ID, err)
|
||||
}
|
||||
|
||||
selinuxFreeLxcContexts(container.ProcessLabel)
|
||||
daemon.idIndex.Delete(container.ID)
|
||||
daemon.containers.Delete(container.ID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -11,16 +11,18 @@ import (
|
|||
|
||||
type State struct {
|
||||
sync.Mutex
|
||||
Running bool
|
||||
Paused bool
|
||||
Restarting bool
|
||||
OOMKilled bool
|
||||
Pid int
|
||||
ExitCode int
|
||||
Error string // contains last known error when starting the container
|
||||
StartedAt time.Time
|
||||
FinishedAt time.Time
|
||||
waitChan chan struct{}
|
||||
Running bool
|
||||
Paused bool
|
||||
Restarting bool
|
||||
OOMKilled bool
|
||||
removalInProgress bool // Not need for this to be persistent on disk.
|
||||
Dead bool
|
||||
Pid int
|
||||
ExitCode int
|
||||
Error string // contains last known error when starting the container
|
||||
StartedAt time.Time
|
||||
FinishedAt time.Time
|
||||
waitChan chan struct{}
|
||||
}
|
||||
|
||||
func NewState() *State {
|
||||
|
@ -42,6 +44,14 @@ func (s *State) String() string {
|
|||
return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
|
||||
}
|
||||
|
||||
if s.removalInProgress {
|
||||
return "Removal In Progress"
|
||||
}
|
||||
|
||||
if s.Dead {
|
||||
return "Dead"
|
||||
}
|
||||
|
||||
if s.FinishedAt.IsZero() {
|
||||
return ""
|
||||
}
|
||||
|
@ -60,6 +70,11 @@ func (s *State) StateString() string {
|
|||
}
|
||||
return "running"
|
||||
}
|
||||
|
||||
if s.Dead {
|
||||
return "dead"
|
||||
}
|
||||
|
||||
return "exited"
|
||||
}
|
||||
|
||||
|
@ -217,3 +232,25 @@ func (s *State) IsPaused() bool {
|
|||
s.Unlock()
|
||||
return res
|
||||
}
|
||||
|
||||
func (s *State) SetRemovalInProgress() error {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
if s.removalInProgress {
|
||||
return fmt.Errorf("Status is already RemovalInProgress")
|
||||
}
|
||||
s.removalInProgress = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *State) ResetRemovalInProgress() {
|
||||
s.Lock()
|
||||
s.removalInProgress = false
|
||||
s.Unlock()
|
||||
}
|
||||
|
||||
func (s *State) SetDead() {
|
||||
s.Lock()
|
||||
s.Dead = true
|
||||
s.Unlock()
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue