소스 검색

Add SIGUSR1 handler for dumping stack/goroutine traces

Add handler for SIGUSR1 based on feedback regarding when to dump
goroutine stacks.  This will also dump goroutine stack traces on SIGQUIT
followed by a hard-exit from the daemon.

Docker-DCO-1.1-Signed-off-by: Phil Estes <estesp@linux.vnet.ibm.com> (github: estesp)
Phil Estes 10 년 전
부모
커밋
95fcf76cc6
4개의 변경된 파일52개의 추가작업 그리고 11개의 파일을 삭제
  1. 3 0
      daemon/daemon.go
  2. 21 0
      daemon/debugtrap.go
  3. 7 0
      daemon/debugtrap_unsupported.go
  4. 21 11
      pkg/signal/trap.go

+ 3 - 0
daemon/daemon.go

@@ -747,6 +747,9 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 		return nil, err
 		return nil, err
 	}
 	}
 
 
+	// set up SIGUSR1 handler to dump Go routine stacks
+	setupSigusr1Trap()
+
 	// set up the tmpDir to use a canonical path
 	// set up the tmpDir to use a canonical path
 	tmp, err := tempDir(config.Root)
 	tmp, err := tempDir(config.Root)
 	if err != nil {
 	if err != nil {

+ 21 - 0
daemon/debugtrap.go

@@ -0,0 +1,21 @@
+// +build !windows
+
+package daemon
+
+import (
+	"os"
+	"os/signal"
+	"syscall"
+
+	psignal "github.com/docker/docker/pkg/signal"
+)
+
+func setupSigusr1Trap() {
+	c := make(chan os.Signal, 1)
+	signal.Notify(c, syscall.SIGUSR1)
+	go func() {
+		for range c {
+			psignal.DumpStacks()
+		}
+	}()
+}

+ 7 - 0
daemon/debugtrap_unsupported.go

@@ -0,0 +1,7 @@
+// +build !linux,!darwin,!freebsd
+
+package signal
+
+func setupSigusr1Trap() {
+	return
+}

+ 21 - 11
pkg/signal/trap.go

@@ -3,6 +3,7 @@ package signal
 import (
 import (
 	"os"
 	"os"
 	gosignal "os/signal"
 	gosignal "os/signal"
+	"runtime"
 	"sync/atomic"
 	"sync/atomic"
 	"syscall"
 	"syscall"
 
 
@@ -14,41 +15,50 @@ import (
 // (and the Docker engine in particular).
 // (and the Docker engine in particular).
 //
 //
 // * If SIGINT or SIGTERM are received, `cleanup` is called, then the process is terminated.
 // * If SIGINT or SIGTERM are received, `cleanup` is called, then the process is terminated.
-// * If SIGINT or SIGTERM are repeated 3 times before cleanup is complete, then cleanup is
-// skipped and the process terminated directly.
-// * If "DEBUG" is set in the environment, SIGQUIT causes an exit without cleanup.
+// * If SIGINT or SIGTERM are received 3 times before cleanup is complete, then cleanup is
+//   skipped and the process is terminated immediately (allows force quit of stuck daemon)
+// * A SIGQUIT always causes an exit without cleanup, with a goroutine dump preceding exit.
 //
 //
 func Trap(cleanup func()) {
 func Trap(cleanup func()) {
 	c := make(chan os.Signal, 1)
 	c := make(chan os.Signal, 1)
-	signals := []os.Signal{os.Interrupt, syscall.SIGTERM}
-	if os.Getenv("DEBUG") == "" {
-		signals = append(signals, syscall.SIGQUIT)
-	}
+	// we will handle INT, TERM, QUIT here
+	signals := []os.Signal{os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}
 	gosignal.Notify(c, signals...)
 	gosignal.Notify(c, signals...)
 	go func() {
 	go func() {
 		interruptCount := uint32(0)
 		interruptCount := uint32(0)
 		for sig := range c {
 		for sig := range c {
 			go func(sig os.Signal) {
 			go func(sig os.Signal) {
-				logrus.Infof("Received signal '%v', starting shutdown of docker...", sig)
+				logrus.Infof("Processing signal '%v'", sig)
 				switch sig {
 				switch sig {
 				case os.Interrupt, syscall.SIGTERM:
 				case os.Interrupt, syscall.SIGTERM:
-					// If the user really wants to interrupt, let him do so.
 					if atomic.LoadUint32(&interruptCount) < 3 {
 					if atomic.LoadUint32(&interruptCount) < 3 {
 						// Initiate the cleanup only once
 						// Initiate the cleanup only once
 						if atomic.AddUint32(&interruptCount, 1) == 1 {
 						if atomic.AddUint32(&interruptCount, 1) == 1 {
-							// Call cleanup handler
+							// Call the provided cleanup handler
 							cleanup()
 							cleanup()
 							os.Exit(0)
 							os.Exit(0)
 						} else {
 						} else {
 							return
 							return
 						}
 						}
 					} else {
 					} else {
-						logrus.Infof("Force shutdown of docker, interrupting cleanup")
+						// 3 SIGTERM/INT signals received; force exit without cleanup
+						logrus.Infof("Forcing docker daemon shutdown without cleanup; 3 interrupts received")
 					}
 					}
 				case syscall.SIGQUIT:
 				case syscall.SIGQUIT:
+					DumpStacks()
+					logrus.Infof("Forcing docker daemon shutdown without cleanup on SIGQUIT")
 				}
 				}
+				//for the SIGINT/TERM, and SIGQUIT non-clean shutdown case, exit with 128 + signal #
 				os.Exit(128 + int(sig.(syscall.Signal)))
 				os.Exit(128 + int(sig.(syscall.Signal)))
 			}(sig)
 			}(sig)
 		}
 		}
 	}()
 	}()
 }
 }
+
+func DumpStacks() {
+	buf := make([]byte, 16384)
+	buf = buf[:runtime.Stack(buf, true)]
+	// Note that if the daemon is started with a less-verbose log-level than "info" (the default), the goroutine
+	// traces won't show up in the log.
+	logrus.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf)
+}