Windows: Pass back system errors on container exit
Signed-off-by: John Howard <jhoward@microsoft.com> While debugging #32838, it was found (https://github.com/moby/moby/issues/32838#issuecomment-356005845) that the utility VM in some circumstances was crashing. Unfortunately, this was silently thrown away, and as far as the build step (also applies to docker run) was concerned, the exit code was zero and the error was thrown away. Windows containers operate differently to containers on Linux, and there can be legitimate system errors during container shutdown after the init process exits. This PR handles this and passes the error all the way back to the client, and correctly causes a build step running a container which hits a system error to fail, rather than blindly trying to keep going, assuming all is good, and get a subsequent failure on a commit. With this change, assuming an error occurs, here's an example of a failure which previous was reported as a commit error: ``` The command 'powershell -Command $ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue'; Install-WindowsFeature -Name Web-App-Dev ; Install-WindowsFeature -Name ADLDS; Install-WindowsFeature -Name Web-Mgmt-Compat; Install-WindowsFeature -Name Web-Mgmt-Service; Install-WindowsFeature -Name Web-Metabase; Install-WindowsFeature -Name Web-Lgcy-Scripting; Install-WindowsFeature -Name Web-WMI; Install-WindowsFeature -Name Web-WHC; Install-WindowsFeature -Name Web-Scripting-Tools; Install-WindowsFeature -Name Web-Net-Ext45; Install-WindowsFeature -Name Web-ASP; Install-WindowsFeature -Name Web-ISAPI-Ext; Install-WindowsFeature -Name Web-ISAPI-Filter; Install-WindowsFeature -Name Web-Default-Doc; Install-WindowsFeature -Name Web-Dir-Browsing; Install-WindowsFeature -Name Web-Http-Errors; Install-WindowsFeature -Name Web-Static-Content; Install-WindowsFeature -Name Web-Http-Redirect; Install-WindowsFeature -Name Web-DAV-Publishing; Install-WindowsFeature -Name Web-Health; Install-WindowsFeature -Name Web-Http-Logging; Install-WindowsFeature -Name Web-Custom-Logging; Install-WindowsFeature -Name Web-Log-Libraries; Install-WindowsFeature -Name Web-Request-Monitor; Install-WindowsFeature -Name Web-Http-Tracing; Install-WindowsFeature -Name Web-Stat-Compression; Install-WindowsFeature -Name Web-Dyn-Compression; Install-WindowsFeature -Name Web-Security; Install-WindowsFeature -Name Web-Windows-Auth; Install-WindowsFeature -Name Web-Basic-Auth; Install-WindowsFeature -Name Web-Url-Auth; Install-WindowsFeature -Name Web-WebSockets; Install-WindowsFeature -Name Web-AppInit; Install-WindowsFeature -Name NET-WCF-HTTP-Activation45; Install-WindowsFeature -Name NET-WCF-Pipe-Activation45; Install-WindowsFeature -Name NET-WCF-TCP-Activation45;' returned a non-zero code: 4294967295: container shutdown failed: container ba9c65054d42d4830fb25ef55e4ab3287550345aa1a2bb265df4e5bfcd79c78a encountered an error during WaitTimeout: failure in a Windows system call: The compute system exited unexpectedly. (0xc0370106) ``` Without this change, it would be incorrectly reported such as in this comment: https://github.com/moby/moby/issues/32838#issuecomment-309621097 ``` Step 3/8 : ADD buildtools C:/buildtools re-exec error: exit status 1: output: time="2017-06-20T11:37:38+10:00" level=error msg="hcsshim::ImportLayer failed in Win32: The system cannot find the path specified. (0x3) layerId=\\\\?\\C:\\ProgramData\\docker\\windowsfilter\\b41d28c95f98368b73fc192cb9205700e21 6691495c1f9ac79b9b04ec4923ea2 flavour=1 folder=C:\\Windows\\TEMP\\hcs232661915" hcsshim::ImportLayer failed in Win32: The system cannot find the path specified. (0x3) layerId=\\?\C:\ProgramData\docker\windowsfilter\b41d28c95f98368b73fc192cb9205700e216691495c1f9ac79b9b04ec4923ea2 flavour=1 folder=C:\Windows\TEMP\hcs232661915 ```
This commit is contained in:
parent
15001f83bd
commit
8c52560ea4
6 changed files with 43 additions and 7 deletions
|
@ -93,7 +93,7 @@ func (c *containerManager) Run(ctx context.Context, cID string, stdout, stderr i
|
|||
close(finished)
|
||||
logCancellationError(cancelErrCh,
|
||||
fmt.Sprintf("a non-zero code from ContainerWait: %d", status.ExitCode()))
|
||||
return &statusCodeError{code: status.ExitCode(), err: err}
|
||||
return &statusCodeError{code: status.ExitCode(), err: status.Err()}
|
||||
}
|
||||
|
||||
close(finished)
|
||||
|
@ -112,6 +112,9 @@ type statusCodeError struct {
|
|||
}
|
||||
|
||||
func (e *statusCodeError) Error() string {
|
||||
if e.err == nil {
|
||||
return ""
|
||||
}
|
||||
return e.err.Error()
|
||||
}
|
||||
|
||||
|
|
|
@ -348,11 +348,15 @@ func dispatchRun(d dispatchRequest, c *instructions.RunCommand) error {
|
|||
if err := d.builder.containerManager.Run(d.builder.clientCtx, cID, d.builder.Stdout, d.builder.Stderr); err != nil {
|
||||
if err, ok := err.(*statusCodeError); ok {
|
||||
// TODO: change error type, because jsonmessage.JSONError assumes HTTP
|
||||
msg := fmt.Sprintf(
|
||||
"The command '%s' returned a non-zero code: %d",
|
||||
strings.Join(runConfig.Cmd, " "), err.StatusCode())
|
||||
if err.Error() != "" {
|
||||
msg = fmt.Sprintf("%s: %s", msg, err.Error())
|
||||
}
|
||||
return &jsonmessage.JSONError{
|
||||
Message: fmt.Sprintf(
|
||||
"The command '%s' returned a non-zero code: %d",
|
||||
strings.Join(runConfig.Cmd, " "), err.StatusCode()),
|
||||
Code: err.StatusCode(),
|
||||
Message: msg,
|
||||
Code: err.StatusCode(),
|
||||
}
|
||||
}
|
||||
return err
|
||||
|
|
|
@ -29,7 +29,7 @@ type State struct {
|
|||
Dead bool
|
||||
Pid int
|
||||
ExitCodeValue int `json:"ExitCode"`
|
||||
ErrorMsg string `json:"Error"` // contains last known error during container start or remove
|
||||
ErrorMsg string `json:"Error"` // contains last known error during container start, stop, or remove
|
||||
StartedAt time.Time
|
||||
FinishedAt time.Time
|
||||
Health *Health
|
||||
|
|
|
@ -69,6 +69,9 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libc
|
|||
c.RestartCount++
|
||||
c.SetRestarting(&exitStatus)
|
||||
} else {
|
||||
if ei.Error != nil {
|
||||
c.SetError(ei.Error)
|
||||
}
|
||||
c.SetStopped(&exitStatus)
|
||||
defer daemon.autoRemove(c)
|
||||
}
|
||||
|
|
|
@ -1203,7 +1203,13 @@ func (c *client) shutdownContainer(ctr *container) error {
|
|||
if err != nil {
|
||||
c.logger.WithError(err).WithField("container", ctr.id).
|
||||
Debug("failed to shutdown container, terminating it")
|
||||
return c.terminateContainer(ctr)
|
||||
terminateErr := c.terminateContainer(ctr)
|
||||
if terminateErr != nil {
|
||||
c.logger.WithError(terminateErr).WithField("container", ctr.id).
|
||||
Error("failed to shutdown container, and subsequent terminate also failed")
|
||||
return fmt.Errorf("%s: subsequent terminate failed %s", err, terminateErr)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -1234,6 +1240,8 @@ func (c *client) reapProcess(ctr *container, p *process) int {
|
|||
"process": p.id,
|
||||
})
|
||||
|
||||
var eventErr error
|
||||
|
||||
// Block indefinitely for the process to exit.
|
||||
if err := p.hcsProcess.Wait(); err != nil {
|
||||
if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
|
||||
|
@ -1263,6 +1271,8 @@ func (c *client) reapProcess(ctr *container, p *process) int {
|
|||
|
||||
if err := p.hcsProcess.Close(); err != nil {
|
||||
logger.WithError(err).Warnf("failed to cleanup hcs process resources")
|
||||
exitCode = -1
|
||||
eventErr = fmt.Errorf("hcsProcess.Close() failed %s", err)
|
||||
}
|
||||
|
||||
var pendingUpdates bool
|
||||
|
@ -1286,13 +1296,27 @@ func (c *client) reapProcess(ctr *container, p *process) int {
|
|||
}
|
||||
|
||||
if err := c.shutdownContainer(ctr); err != nil {
|
||||
exitCode = -1
|
||||
logger.WithError(err).Warn("failed to shutdown container")
|
||||
thisErr := fmt.Errorf("failed to shutdown container: %s", err)
|
||||
if eventErr != nil {
|
||||
eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
|
||||
} else {
|
||||
eventErr = thisErr
|
||||
}
|
||||
} else {
|
||||
logger.Debug("completed container shutdown")
|
||||
}
|
||||
|
||||
if err := ctr.hcsContainer.Close(); err != nil {
|
||||
exitCode = -1
|
||||
logger.WithError(err).Error("failed to clean hcs container resources")
|
||||
thisErr := fmt.Errorf("failed to terminate container: %s", err)
|
||||
if eventErr != nil {
|
||||
eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
|
||||
} else {
|
||||
eventErr = thisErr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1305,6 +1329,7 @@ func (c *client) reapProcess(ctr *container, p *process) int {
|
|||
ExitCode: uint32(exitCode),
|
||||
ExitedAt: exitedAt,
|
||||
UpdatePending: pendingUpdates,
|
||||
Error: eventErr,
|
||||
}
|
||||
c.logger.WithFields(logrus.Fields{
|
||||
"container": ctr.id,
|
||||
|
|
|
@ -73,6 +73,7 @@ type EventInfo struct {
|
|||
OOMKilled bool
|
||||
// Windows Only field
|
||||
UpdatePending bool
|
||||
Error error
|
||||
}
|
||||
|
||||
// Backend defines callbacks that the client of the library needs to implement.
|
||||
|
|
Loading…
Reference in a new issue