Browse Source

Merge pull request #35967 from Microsoft/jjh/32838-pass-container-shutdown-error-back

Windows: Pass back system errors on container exit
Yong Tang 7 years ago
parent
commit
66e6beeb24

+ 4 - 1
builder/dockerfile/containerbackend.go

@@ -93,7 +93,7 @@ func (c *containerManager) Run(ctx context.Context, cID string, stdout, stderr i
 		close(finished)
 		logCancellationError(cancelErrCh,
 			fmt.Sprintf("a non-zero code from ContainerWait: %d", status.ExitCode()))
-		return &statusCodeError{code: status.ExitCode(), err: err}
+		return &statusCodeError{code: status.ExitCode(), err: status.Err()}
 	}
 
 	close(finished)
@@ -112,6 +112,9 @@ type statusCodeError struct {
 }
 
 func (e *statusCodeError) Error() string {
+	if e.err == nil {
+		return ""
+	}
 	return e.err.Error()
 }
 

+ 8 - 4
builder/dockerfile/dispatchers.go

@@ -348,11 +348,15 @@ func dispatchRun(d dispatchRequest, c *instructions.RunCommand) error {
 	if err := d.builder.containerManager.Run(d.builder.clientCtx, cID, d.builder.Stdout, d.builder.Stderr); err != nil {
 		if err, ok := err.(*statusCodeError); ok {
 			// TODO: change error type, because jsonmessage.JSONError assumes HTTP
+			msg := fmt.Sprintf(
+				"The command '%s' returned a non-zero code: %d",
+				strings.Join(runConfig.Cmd, " "), err.StatusCode())
+			if err.Error() != "" {
+				msg = fmt.Sprintf("%s: %s", msg, err.Error())
+			}
 			return &jsonmessage.JSONError{
-				Message: fmt.Sprintf(
-					"The command '%s' returned a non-zero code: %d",
-					strings.Join(runConfig.Cmd, " "), err.StatusCode()),
-				Code: err.StatusCode(),
+				Message: msg,
+				Code:    err.StatusCode(),
 			}
 		}
 		return err

+ 1 - 1
container/state.go

@@ -29,7 +29,7 @@ type State struct {
 	Dead              bool
 	Pid               int
 	ExitCodeValue     int    `json:"ExitCode"`
-	ErrorMsg          string `json:"Error"` // contains last known error during container start or remove
+	ErrorMsg          string `json:"Error"` // contains last known error during container start, stop, or remove
 	StartedAt         time.Time
 	FinishedAt        time.Time
 	Health            *Health

+ 3 - 0
daemon/monitor.go

@@ -69,6 +69,9 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libc
 				c.RestartCount++
 				c.SetRestarting(&exitStatus)
 			} else {
+				if ei.Error != nil {
+					c.SetError(ei.Error)
+				}
 				c.SetStopped(&exitStatus)
 				defer daemon.autoRemove(c)
 			}

+ 26 - 1
libcontainerd/client_local_windows.go

@@ -1203,7 +1203,13 @@ func (c *client) shutdownContainer(ctr *container) error {
 	if err != nil {
 		c.logger.WithError(err).WithField("container", ctr.id).
 			Debug("failed to shutdown container, terminating it")
-		return c.terminateContainer(ctr)
+		terminateErr := c.terminateContainer(ctr)
+		if terminateErr != nil {
+			c.logger.WithError(terminateErr).WithField("container", ctr.id).
+				Error("failed to shutdown container, and subsequent terminate also failed")
+			return fmt.Errorf("%s: subsequent terminate failed %s", err, terminateErr)
+		}
+		return err
 	}
 
 	return nil
@@ -1234,6 +1240,8 @@ func (c *client) reapProcess(ctr *container, p *process) int {
 		"process":   p.id,
 	})
 
+	var eventErr error
+
 	// Block indefinitely for the process to exit.
 	if err := p.hcsProcess.Wait(); err != nil {
 		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
@@ -1263,6 +1271,8 @@ func (c *client) reapProcess(ctr *container, p *process) int {
 
 	if err := p.hcsProcess.Close(); err != nil {
 		logger.WithError(err).Warnf("failed to cleanup hcs process resources")
+		exitCode = -1
+		eventErr = fmt.Errorf("hcsProcess.Close() failed %s", err)
 	}
 
 	var pendingUpdates bool
@@ -1286,13 +1296,27 @@ func (c *client) reapProcess(ctr *container, p *process) int {
 		}
 
 		if err := c.shutdownContainer(ctr); err != nil {
+			exitCode = -1
 			logger.WithError(err).Warn("failed to shutdown container")
+			thisErr := fmt.Errorf("failed to shutdown container: %s", err)
+			if eventErr != nil {
+				eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
+			} else {
+				eventErr = thisErr
+			}
 		} else {
 			logger.Debug("completed container shutdown")
 		}
 
 		if err := ctr.hcsContainer.Close(); err != nil {
+			exitCode = -1
 			logger.WithError(err).Error("failed to clean hcs container resources")
+			thisErr := fmt.Errorf("failed to terminate container: %s", err)
+			if eventErr != nil {
+				eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
+			} else {
+				eventErr = thisErr
+			}
 		}
 	}
 
@@ -1305,6 +1329,7 @@ func (c *client) reapProcess(ctr *container, p *process) int {
 				ExitCode:      uint32(exitCode),
 				ExitedAt:      exitedAt,
 				UpdatePending: pendingUpdates,
+				Error:         eventErr,
 			}
 			c.logger.WithFields(logrus.Fields{
 				"container":  ctr.id,

+ 1 - 0
libcontainerd/types.go

@@ -73,6 +73,7 @@ type EventInfo struct {
 	OOMKilled   bool
 	// Windows Only field
 	UpdatePending bool
+	Error         error
 }
 
 // Backend defines callbacks that the client of the library needs to implement.