Merge pull request #25143 from cpuguy83/update_go-check

Add stack dumps from the daemon(s) on test timeout
2016-08-15 22:09:20 -07:00 · 2016-08-15 22:09:20 -07:00 · 3c6f4cd5a6
commit 3c6f4cd5a6
parent f5ef498289 82dd2c3159
7 changed files with 160 additions and 4 deletions
--- a/hack/vendor.sh
+++ b/hack/vendor.sh
@ -47,7 +47,7 @@ clone git github.com/Microsoft/hcsshim v0.3.6
 clone git github.com/Microsoft/go-winio v0.3.4
 clone git github.com/Sirupsen/logrus v0.10.0 # logrus is a common dependency among multiple deps
 clone git github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
-clone git github.com/go-check/check 03a4d9dcf2f92eae8e90ed42aa2656f63fdd0b14 https://github.com/cpuguy83/check.git
+clone git github.com/go-check/check 4ed411733c5785b40214c70bce814c3a3a689609 https://github.com/cpuguy83/check.git
 clone git github.com/gorilla/context v1.1
 clone git github.com/gorilla/mux v1.1
 clone git github.com/kr/pty 5cf931ef8f
--- a/integration-cli/check_test.go
+++ b/integration-cli/check_test.go
@ -33,6 +33,12 @@ func init() {
 type DockerSuite struct {
 }

+func (s *DockerSuite) OnTimeout(c *check.C) {
+	if daemonPid > 0 && isLocalDaemon {
+		signalDaemonDump(daemonPid)
+	}
+}
+
 func (s *DockerSuite) TearDownTest(c *check.C) {
 	unpauseAllContainers()
 	deleteAllContainers()
@ -53,6 +59,10 @@ type DockerRegistrySuite struct {
 	d   *Daemon
 }

+func (s *DockerRegistrySuite) OnTimeout(c *check.C) {
+	s.d.DumpStackAndQuit()
+}
+
 func (s *DockerRegistrySuite) SetUpTest(c *check.C) {
 	testRequires(c, DaemonIsLinux, RegistryHosting)
 	s.reg = setupRegistry(c, false, "", "")
@ -81,6 +91,10 @@ type DockerSchema1RegistrySuite struct {
 	d   *Daemon
 }

+func (s *DockerSchema1RegistrySuite) OnTimeout(c *check.C) {
+	s.d.DumpStackAndQuit()
+}
+
 func (s *DockerSchema1RegistrySuite) SetUpTest(c *check.C) {
 	testRequires(c, DaemonIsLinux, RegistryHosting, NotArm64)
 	s.reg = setupRegistry(c, true, "", "")
@ -109,6 +123,10 @@ type DockerRegistryAuthHtpasswdSuite struct {
 	d   *Daemon
 }

+func (s *DockerRegistryAuthHtpasswdSuite) OnTimeout(c *check.C) {
+	s.d.DumpStackAndQuit()
+}
+
 func (s *DockerRegistryAuthHtpasswdSuite) SetUpTest(c *check.C) {
 	testRequires(c, DaemonIsLinux, RegistryHosting)
 	s.reg = setupRegistry(c, false, "htpasswd", "")
@ -139,6 +157,10 @@ type DockerRegistryAuthTokenSuite struct {
 	d   *Daemon
 }

+func (s *DockerRegistryAuthTokenSuite) OnTimeout(c *check.C) {
+	s.d.DumpStackAndQuit()
+}
+
 func (s *DockerRegistryAuthTokenSuite) SetUpTest(c *check.C) {
 	testRequires(c, DaemonIsLinux, RegistryHosting)
 	s.d = NewDaemon(c)
@ -174,6 +196,10 @@ type DockerDaemonSuite struct {
 	d  *Daemon
 }

+func (s *DockerDaemonSuite) OnTimeout(c *check.C) {
+	s.d.DumpStackAndQuit()
+}
+
 func (s *DockerDaemonSuite) SetUpTest(c *check.C) {
 	testRequires(c, DaemonIsLinux)
 	s.d = NewDaemon(c)
@ -217,6 +243,14 @@ type DockerSwarmSuite struct {
 	portIndex   int
 }

+func (s *DockerSwarmSuite) OnTimeout(c *check.C) {
+	s.daemonsLock.Lock()
+	defer s.daemonsLock.Unlock()
+	for _, d := range s.daemons {
+		d.DumpStackAndQuit()
+	}
+}
+
 func (s *DockerSwarmSuite) SetUpTest(c *check.C) {
 	testRequires(c, DaemonIsLinux)
 }
--- a/integration-cli/daemon.go
+++ b/integration-cli/daemon.go
@ -273,6 +273,16 @@ func (d *Daemon) Kill() error {
 	return nil
 }

+// DumpStackAndQuit sends SIGQUIT to the daemon, which triggers it to dump its
+// stack to its log file and exit
+// This is used primarily for gathering debug information on test timeout
+func (d *Daemon) DumpStackAndQuit() {
+	if d.cmd == nil || d.cmd.Process == nil {
+		return
+	}
+	signalDaemonDump(d.cmd.Process.Pid)
+}
+
 // Stop will send a SIGINT every second and wait for the daemon to stop.
 // If it timeouts, a SIGKILL is sent.
 // Stop will not delete the daemon directory. If a purged daemon is needed,
--- a/integration-cli/daemon_unix.go
+++ b/integration-cli/daemon_unix.go
@ -0,0 +1,9 @@
+// +build !windows
+
+package main
+
+import "syscall"
+
+func signalDaemonDump(pid int) {
+	syscall.Kill(pid, syscall.SIGQUIT)
+}
--- a/integration-cli/daemon_windows.go
+++ b/integration-cli/daemon_windows.go
@ -0,0 +1,42 @@
+package main
+
+import (
+	"strconv"
+	"syscall"
+	"unsafe"
+)
+
+func openEvent(desiredAccess uint32, inheritHandle bool, name string, proc *syscall.LazyProc) (handle syscall.Handle, err error) {
+	namep, _ := syscall.UTF16PtrFromString(name)
+	var _p2 uint32
+	if inheritHandle {
+		_p2 = 1
+	}
+	r0, _, e1 := proc.Call(uintptr(desiredAccess), uintptr(_p2), uintptr(unsafe.Pointer(namep)))
+	handle = syscall.Handle(r0)
+	if handle == syscall.InvalidHandle {
+		err = e1
+	}
+	return
+}
+
+func pulseEvent(handle syscall.Handle, proc *syscall.LazyProc) (err error) {
+	r0, _, _ := proc.Call(uintptr(handle))
+	if r0 != 0 {
+		err = syscall.Errno(r0)
+	}
+	return
+}
+
+func signalDaemonDump(pid int) {
+	modkernel32 := syscall.NewLazyDLL("kernel32.dll")
+	procOpenEvent := modkernel32.NewProc("OpenEventW")
+	procPulseEvent := modkernel32.NewProc("PulseEvent")
+
+	ev := "Global\\docker-daemon-" + strconv.Itoa(pid)
+	h2, _ := openEvent(0x0002, false, ev, procOpenEvent)
+	if h2 == 0 {
+		return
+	}
+	pulseEvent(h2, procPulseEvent)
+}
--- a/integration-cli/docker_test_vars.go
+++ b/integration-cli/docker_test_vars.go
@ -3,8 +3,11 @@ package main
 import (
 	"encoding/json"
 	"fmt"
+	"io/ioutil"
 	"os"
 	"os/exec"
+	"path/filepath"
+	"strconv"

 	"github.com/docker/docker/pkg/reexec"
 )
@ -65,6 +68,9 @@ var (
 	// WindowsBaseImage is the name of the base image for Windows testing
 	// Environment variable WINDOWS_BASE_IMAGE can override this
 	WindowsBaseImage = "windowsservercore"
+
+	// daemonPid is the pid of the main test daemon
+	daemonPid int
 )

 const (
@ -134,4 +140,12 @@ func init() {
 		WindowsBaseImage = os.Getenv("WINDOWS_BASE_IMAGE")
 		fmt.Println("INFO: Windows Base image is ", WindowsBaseImage)
 	}
+
+	dest := os.Getenv("DEST")
+	b, err = ioutil.ReadFile(filepath.Join(dest, "docker.pid"))
+	if err == nil {
+		if p, err := strconv.ParseInt(string(b), 10, 32); err == nil {
+			daemonPid = int(p)
+		}
+	}
 }
--- a/vendor/src/github.com/go-check/check/check.go
+++ b/vendor/src/github.com/go-check/check/check.go
@ -514,6 +514,7 @@ type suiteRunner struct {
 	suite                     interface{}
 	setUpSuite, tearDownSuite *methodType
 	setUpTest, tearDownTest   *methodType
+	onTimeout                 *methodType
 	tests                     []*methodType
 	tracker                   *resultTracker
 	tempDir                   *tempDir
@ -591,6 +592,8 @@ func newSuiteRunner(suite interface{}, runConf *RunConf) *suiteRunner {
 			runner.setUpTest = method
 		case "TearDownTest":
 			runner.tearDownTest = method
+		case "OnTimeout":
+			runner.onTimeout = method
 		default:
 			prefix := "Test"
 			if conf.Benchmark {
@ -671,6 +674,23 @@ func (runner *suiteRunner) forkCall(method *methodType, kind funcKind, testName
 	return c
 }

+type timeoutErr struct {
+	method *methodType
+	t      time.Duration
+}
+
+func (e timeoutErr) Error() string {
+	return fmt.Sprintf("%s test timed out after %v", e.method.String(), e.t)
+}
+
+func isTimeout(e error) bool {
+	if e == nil {
+		return false
+	}
+	_, ok := e.(timeoutErr)
+	return ok
+}
+
 // Same as forkCall(), but wait for call to finish before returning.
 func (runner *suiteRunner) runFunc(method *methodType, kind funcKind, testName string, logb *logger, dispatcher func(c *C)) *C {
 	var timeout <-chan time.Time
@ -681,7 +701,19 @@ func (runner *suiteRunner) runFunc(method *methodType, kind funcKind, testName s
 	select {
 	case <-c.done:
 	case <-timeout:
-		panic(fmt.Sprintf("%s test timed out after %v", method.String(), runner.checkTimeout))
+		if runner.onTimeout != nil {
+			// run the OnTimeout callback, allowing the suite to collect any sort of debug information it can
+			// `runFixture` is syncronous, so run this in a separate goroutine with a timeout
+			cChan := make(chan *C)
+			go func() {
+				cChan <- runner.runFixture(runner.onTimeout, c.testName, c.logb)
+			}()
+			select {
+			case <-cChan:
+			case <-time.After(runner.checkTimeout):
+			}
+		}
+		panic(timeoutErr{method, runner.checkTimeout})
 	}
 	return c
 }
@ -777,12 +809,14 @@ func (runner *suiteRunner) forkTest(method *methodType) *C {
 				c.logArgPanic(c.method, "*check.C")
 				return
 			}
+
 			if strings.HasPrefix(c.method.Info.Name, "Test") {
 				c.ResetTimer()
 				c.StartTimer()
 				c.method.Call([]reflect.Value{reflect.ValueOf(c)})
 				return
 			}
+
 			if !strings.HasPrefix(c.method.Info.Name, "Benchmark") {
 				panic("unexpected method prefix: " + c.method.Info.Name)
 			}
@ -791,6 +825,7 @@ func (runner *suiteRunner) forkTest(method *methodType) *C {
 			c.N = benchN
 			c.ResetTimer()
 			c.StartTimer()
+
 			c.method.Call([]reflect.Value{reflect.ValueOf(c)})
 			c.StopTimer()
 			if c.status() != succeededSt || c.duration >= c.benchTime || benchN >= 1e9 {
@ -825,7 +860,19 @@ func (runner *suiteRunner) runTest(method *methodType) *C {
 	select {
 	case <-c.done:
 	case <-timeout:
-		panic(fmt.Sprintf("%s test timed out after %v", method.String(), runner.checkTimeout))
+		if runner.onTimeout != nil {
+			// run the OnTimeout callback, allowing the suite to collect any sort of debug information it can
+			// `runFixture` is syncronous, so run this in a separate goroutine with a timeout
+			cChan := make(chan *C)
+			go func() {
+				cChan <- runner.runFixture(runner.onTimeout, c.testName, c.logb)
+			}()
+			select {
+			case <-cChan:
+			case <-time.After(runner.checkTimeout):
+			}
+		}
+		panic(timeoutErr{method, runner.checkTimeout})
 	}
 	return c
 }
@ -846,7 +893,7 @@ func (runner *suiteRunner) skipTests(status funcStatus, methods []*methodType) {
 func (runner *suiteRunner) checkFixtureArgs() bool {
 	succeeded := true
 	argType := reflect.TypeOf(&C{})
-	for _, method := range []*methodType{runner.setUpSuite, runner.tearDownSuite, runner.setUpTest, runner.tearDownTest} {
+	for _, method := range []*methodType{runner.setUpSuite, runner.tearDownSuite, runner.setUpTest, runner.tearDownTest, runner.onTimeout} {
 		if method != nil {
 			mt := method.Type()
 			if mt.NumIn() != 1 || mt.In(0) != argType {