Fix daemon shutdown on error after rework of daemon startup
Currently the daemon will not stop on error because the serve API job is blocking the channel wait for daemon init. A better way is to run the blocking serve API job as a goroutine and make sure that error notification gets back to the main daemon thread (using the already existing channel) so that clean shutdown can occur on error. Docker-DCO-1.1-Signed-off-by: Phil Estes <estesp@linux.vnet.ibm.com> (github: estesp)
This commit is contained in:
parent
44e9715e46
commit
459e58ffc9
4 changed files with 93 additions and 29 deletions
|
@ -817,6 +817,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
|
|||
}
|
||||
config.DisableNetwork = config.BridgeIface == disableNetworkBridge
|
||||
|
||||
// register portallocator release on shutdown
|
||||
eng.OnShutdown(func() {
|
||||
if err := portallocator.ReleaseAll(); err != nil {
|
||||
log.Errorf("portallocator.ReleaseAll(): %s", err)
|
||||
}
|
||||
})
|
||||
// Claim the pidfile first, to avoid any and all unexpected race conditions.
|
||||
// Some of the init doesn't need a pidfile lock - but let's not try to be smart.
|
||||
if config.Pidfile != "" {
|
||||
|
@ -879,6 +885,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
|
|||
return nil, fmt.Errorf("error intializing graphdriver: %v", err)
|
||||
}
|
||||
log.Debugf("Using graph driver %s", driver)
|
||||
// register cleanup for graph driver
|
||||
eng.OnShutdown(func() {
|
||||
if err := driver.Cleanup(); err != nil {
|
||||
log.Errorf("Error during graph storage driver.Cleanup(): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
// As Docker on btrfs and SELinux are incompatible at present, error on both being enabled
|
||||
if selinuxEnabled() && config.EnableSelinuxSupport && driver.String() == "btrfs" {
|
||||
|
@ -956,6 +968,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// register graph close on shutdown
|
||||
eng.OnShutdown(func() {
|
||||
if err := graph.Close(); err != nil {
|
||||
log.Errorf("Error during container graph.Close(): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
localCopy := path.Join(config.Root, "init", fmt.Sprintf("dockerinit-%s", dockerversion.VERSION))
|
||||
sysInitPath := utils.DockerInitPath(localCopy)
|
||||
|
@ -1003,22 +1021,9 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
|
|||
statsCollector: newStatsCollector(1 * time.Second),
|
||||
}
|
||||
|
||||
// Setup shutdown handlers
|
||||
// FIXME: can these shutdown handlers be registered closer to their source?
|
||||
eng.OnShutdown(func() {
|
||||
// FIXME: if these cleanup steps can be called concurrently, register
|
||||
// them as separate handlers to speed up total shutdown time
|
||||
if err := daemon.shutdown(); err != nil {
|
||||
log.Errorf("daemon.shutdown(): %s", err)
|
||||
}
|
||||
if err := portallocator.ReleaseAll(); err != nil {
|
||||
log.Errorf("portallocator.ReleaseAll(): %s", err)
|
||||
}
|
||||
if err := daemon.driver.Cleanup(); err != nil {
|
||||
log.Errorf("daemon.driver.Cleanup(): %v", err)
|
||||
}
|
||||
if err := daemon.containerGraph.Close(); err != nil {
|
||||
log.Errorf("daemon.containerGraph.Close(): %v", err)
|
||||
log.Errorf("Error during daemon.shutdown(): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
log "github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/autogen/dockerversion"
|
||||
|
@ -101,13 +102,11 @@ func mainDaemon() {
|
|||
// load the daemon in the background so we can immediately start
|
||||
// the http api so that connections don't fail while the daemon
|
||||
// is booting
|
||||
daemonWait := make(chan struct{})
|
||||
daemonInitWait := make(chan error)
|
||||
go func() {
|
||||
defer close(daemonWait)
|
||||
|
||||
d, err := daemon.NewDaemon(daemonCfg, eng)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
daemonInitWait <- err
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -119,7 +118,7 @@ func mainDaemon() {
|
|||
)
|
||||
|
||||
if err := d.Install(eng); err != nil {
|
||||
log.Error(err)
|
||||
daemonInitWait <- err
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -129,11 +128,10 @@ func mainDaemon() {
|
|||
// after the daemon is done setting up we can tell the api to start
|
||||
// accepting connections
|
||||
if err := eng.Job("acceptconnections").Run(); err != nil {
|
||||
log.Error(err)
|
||||
daemonInitWait <- err
|
||||
return
|
||||
}
|
||||
|
||||
log.Debugf("daemon finished")
|
||||
daemonInitWait <- nil
|
||||
}()
|
||||
|
||||
// Serve api
|
||||
|
@ -150,16 +148,46 @@ func mainDaemon() {
|
|||
job.Setenv("TlsCert", *flCert)
|
||||
job.Setenv("TlsKey", *flKey)
|
||||
job.SetenvBool("BufferRequests", true)
|
||||
err := job.Run()
|
||||
|
||||
// The serve API job never exits unless an error occurs
|
||||
// We need to start it as a goroutine and wait on it so
|
||||
// daemon doesn't exit
|
||||
serveAPIWait := make(chan error)
|
||||
go func() {
|
||||
if err := job.Run(); err != nil {
|
||||
log.Errorf("ServeAPI error: %v", err)
|
||||
serveAPIWait <- err
|
||||
return
|
||||
}
|
||||
serveAPIWait <- nil
|
||||
}()
|
||||
|
||||
// Wait for the daemon startup goroutine to finish
|
||||
// This makes sure we can actually cleanly shutdown the daemon
|
||||
log.Infof("waiting for daemon to initialize")
|
||||
<-daemonWait
|
||||
eng.Shutdown()
|
||||
if err != nil {
|
||||
// log errors here so the log output looks more consistent
|
||||
log.Fatalf("shutting down daemon due to errors: %v", err)
|
||||
log.Debug("waiting for daemon to initialize")
|
||||
errDaemon := <-daemonInitWait
|
||||
if errDaemon != nil {
|
||||
eng.Shutdown()
|
||||
outStr := fmt.Sprintf("Shutting down daemon due to errors: %v", errDaemon)
|
||||
if strings.Contains(errDaemon.Error(), "engine is shutdown") {
|
||||
// if the error is "engine is shutdown", we've already reported (or
|
||||
// will report below in API server errors) the error
|
||||
outStr = "Shutting down daemon due to reported errors"
|
||||
}
|
||||
// we must "fatal" exit here as the API server may be happy to
|
||||
// continue listening forever if the error had no impact to API
|
||||
log.Fatal(outStr)
|
||||
} else {
|
||||
log.Info("Daemon has completed initialization")
|
||||
}
|
||||
|
||||
// Daemon is fully initialized and handling API traffic
|
||||
// Wait for serve API job to complete
|
||||
errAPI := <-serveAPIWait
|
||||
// If we have an error here it is unique to API (as daemonErr would have
|
||||
// exited the daemon process above)
|
||||
if errAPI != nil {
|
||||
log.Errorf("Shutting down due to ServeAPI error: %v", errAPI)
|
||||
}
|
||||
eng.Shutdown()
|
||||
}
|
||||
|
|
|
@ -481,6 +481,33 @@ func TestDaemonUpgradeWithVolumes(t *testing.T) {
|
|||
logDone("daemon - volumes from old(pre 1.3) daemon work")
|
||||
}
|
||||
|
||||
// GH#11320 - verify that the daemon exits on failure properly
|
||||
// Note that this explicitly tests the conflict of {-b,--bridge} and {--bip} options as the means
|
||||
// to get a daemon init failure; no other tests for -b/--bip conflict are therefore required
|
||||
func TestDaemonExitOnFailure(t *testing.T) {
|
||||
d := NewDaemon(t)
|
||||
defer d.Stop()
|
||||
|
||||
//attempt to start daemon with incorrect flags (we know -b and --bip conflict)
|
||||
if err := d.Start("--bridge", "nosuchbridge", "--bip", "1.1.1.1"); err != nil {
|
||||
//verify we got the right error
|
||||
if !strings.Contains(err.Error(), "Daemon exited and never started") {
|
||||
t.Fatalf("Expected daemon not to start, got %v", err)
|
||||
}
|
||||
// look in the log and make sure we got the message that daemon is shutting down
|
||||
runCmd := exec.Command("grep", "Shutting down daemon due to", d.LogfileName())
|
||||
if out, _, err := runCommandWithOutput(runCmd); err != nil {
|
||||
t.Fatalf("Expected 'shutting down daemon due to error' message; but doesn't exist in log: %q, err: %v", out, err)
|
||||
}
|
||||
} else {
|
||||
//if we didn't get an error and the daemon is running, this is a failure
|
||||
d.Stop()
|
||||
t.Fatal("Conflicting options should cause the daemon to error out with a failure")
|
||||
}
|
||||
|
||||
logDone("daemon - verify no start on daemon init errors")
|
||||
}
|
||||
|
||||
func TestDaemonUlimitDefaults(t *testing.T) {
|
||||
testRequires(t, NativeExecDriver)
|
||||
d := NewDaemon(t)
|
||||
|
|
|
@ -267,6 +267,10 @@ func (d *Daemon) Cmd(name string, arg ...string) (string, error) {
|
|||
return string(b), err
|
||||
}
|
||||
|
||||
func (d *Daemon) LogfileName() string {
|
||||
return d.logFile.Name()
|
||||
}
|
||||
|
||||
func daemonHost() string {
|
||||
daemonUrlStr := "unix://" + api.DEFAULTUNIXSOCKET
|
||||
if daemonHostVar := os.Getenv("DOCKER_HOST"); daemonHostVar != "" {
|
||||
|
|
Loading…
Reference in a new issue