Windows: Fix handle leaks/logging if init proc start fails

Signed-off-by: John Howard <jhoward@microsoft.com>

Fixes #38719

Fixes some subtle bugs on Windows

 - Fixes https://github.com/moby/moby/issues/38719. This one is the most important
   as failure to start the init process in a Windows container will cause leaked
   handles. (ie where the `ctr.hcsContainer.CreateProcess(...)` call fails).
   The solution to the leak is to split out the `reapContainer` part of `reapProcess`
   into a separate function. This ensures HCS resources are cleaned up correctly and
   not leaked.

 - Ensuring the reapProcess goroutine is started immediately the process
   is actually started, so we don't leak in the case of failures such as
   from `newIOFromProcess` or `attachStdio`

 - libcontainerd on Windows (local, not containerd) was not sending the EventCreate
   back to the monitor on Windows. Just LCOW. This was just an oversight from
   refactoring a couple of years ago by Mikael as far as I can tell. Technically
   not needed for functionality except for the logging being missing, but is correct.
This commit is contained in:
John Howard 2019-02-12 12:19:50 -08:00
parent d4ceb61f2b
commit 32acc76b1a
2 changed files with 92 additions and 66 deletions

View file

@ -133,7 +133,8 @@ func translateContainerdStartErr(cmd string, setExitCode func(int), err error) e
if contains(errDesc, cmd) &&
(contains(errDesc, "executable file not found") ||
contains(errDesc, "no such file or directory") ||
contains(errDesc, "system cannot find the file specified")) {
contains(errDesc, "system cannot find the file specified") ||
contains(errDesc, "failed to run runc create/exec call")) {
setExitCode(127)
retErr = startInvalidConfigError(errDesc)
}

View file

@ -157,17 +157,32 @@ func (c *client) Create(_ context.Context, id string, spec *specs.Spec, runtimeO
return errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
}
// spec.Linux must be nil for Windows containers, but spec.Windows
// will be filled in regardless of container platform. This is a
// temporary workaround due to LCOW requiring layer folder paths,
// which are stored under spec.Windows.
//
// TODO: @darrenstahlmsft fix this once the OCI spec is updated to
// support layer folder paths for LCOW
var err error
if spec.Linux == nil {
return c.createWindows(id, spec, runtimeOptions)
err = c.createWindows(id, spec, runtimeOptions)
} else {
err = c.createLinux(id, spec, runtimeOptions)
}
return c.createLinux(id, spec, runtimeOptions)
if err == nil {
c.eventQ.Append(id, func() {
ei := libcontainerdtypes.EventInfo{
ContainerID: id,
}
c.logger.WithFields(logrus.Fields{
"container": id,
"event": libcontainerdtypes.EventCreate,
}).Info("sending event")
err := c.backend.ProcessEvent(id, libcontainerdtypes.EventCreate, ei)
if err != nil {
c.logger.WithError(err).WithFields(logrus.Fields{
"container": id,
"event": libcontainerdtypes.EventCreate,
}).Error("failed to process event")
}
})
}
return err
}
func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) error {
@ -561,23 +576,6 @@ func (c *client) createLinux(id string, spec *specs.Spec, runtimeOptions interfa
c.containers[id] = ctr
c.Unlock()
c.eventQ.Append(id, func() {
ei := libcontainerdtypes.EventInfo{
ContainerID: id,
}
c.logger.WithFields(logrus.Fields{
"container": ctr.id,
"event": libcontainerdtypes.EventCreate,
}).Info("sending event")
err := c.backend.ProcessEvent(id, libcontainerdtypes.EventCreate, ei)
if err != nil {
c.logger.WithError(err).WithFields(logrus.Fields{
"container": id,
"event": libcontainerdtypes.EventCreate,
}).Error("failed to process event")
}
})
logger.Debug("createLinux() completed successfully")
return nil
}
@ -655,7 +653,9 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
// Configure the CommandLine/CommandArgs
setCommandLineAndArgs(ctr.isWindows, ctr.ociSpec.Process, createProcessParms)
logger.Debugf("start commandLine: %s", createProcessParms.CommandLine)
if ctr.isWindows {
logger.Debugf("start commandLine: %s", createProcessParms.CommandLine)
}
createProcessParms.User = ctr.ociSpec.Process.User.Username
@ -671,14 +671,31 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
}
ctr.Lock()
defer ctr.Unlock()
// Start the command running in the container.
newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
if err != nil {
logger.WithError(err).Error("CreateProcess() failed")
// Fix for https://github.com/moby/moby/issues/38719.
// If the init process failed to launch, we still need to reap the
// container to avoid leaking it.
//
// Note we use the explicit exit code of 127 which is the
// Linux shell equivalent of "command not found". Windows cannot
// know ahead of time whether or not the command exists, especially
// in the case of Hyper-V containers.
ctr.Unlock()
exitedAt := time.Now()
p := &process{
id: libcontainerdtypes.InitProcessName,
pid: 0,
}
c.reapContainer(ctr, p, 127, exitedAt, nil, logger)
return -1, err
}
defer ctr.Unlock()
defer func() {
if err != nil {
if err := newProcess.Kill(); err != nil {
@ -701,6 +718,12 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
}
logger.WithField("pid", p.pid).Debug("init process started")
ctr.status = libcontainerdtypes.StatusRunning
ctr.init = p
// Spin up a go routine waiting for exit to handle cleanup
go c.reapProcess(ctr, p)
dio, err := newIOFromProcess(newProcess, ctr.ociSpec.Process.Terminal)
if err != nil {
logger.WithError(err).Error("failed to get stdio pipes")
@ -708,14 +731,9 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
}
_, err = attachStdio(dio)
if err != nil {
logger.WithError(err).Error("failed to attache stdio")
logger.WithError(err).Error("failed to attach stdio")
return -1, err
}
ctr.status = libcontainerdtypes.StatusRunning
ctr.init = p
// Spin up a go routine waiting for exit to handle cleanup
go c.reapProcess(ctr, p)
// Generate the associated event
c.eventQ.Append(id, func() {
@ -1326,37 +1344,7 @@ func (c *client) reapProcess(ctr *container, p *process) int {
}
if p.id == libcontainerdtypes.InitProcessName {
// Update container status
ctr.Lock()
ctr.status = libcontainerdtypes.StatusStopped
ctr.exitedAt = exitedAt
ctr.exitCode = uint32(exitCode)
close(ctr.waitCh)
if err := c.shutdownContainer(ctr); err != nil {
exitCode = -1
logger.WithError(err).Warn("failed to shutdown container")
thisErr := fmt.Errorf("failed to shutdown container: %s", err)
if eventErr != nil {
eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
} else {
eventErr = thisErr
}
} else {
logger.Debug("completed container shutdown")
}
ctr.Unlock()
if err := ctr.hcsContainer.Close(); err != nil {
exitCode = -1
logger.WithError(err).Error("failed to clean hcs container resources")
thisErr := fmt.Errorf("failed to terminate container: %s", err)
if eventErr != nil {
eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
} else {
eventErr = thisErr
}
}
exitCode, eventErr = c.reapContainer(ctr, p, exitCode, exitedAt, eventErr, logger)
}
c.eventQ.Append(ctr.id, func() {
@ -1390,3 +1378,40 @@ func (c *client) reapProcess(ctr *container, p *process) int {
return exitCode
}
// reapContainer shuts down the container and releases associated resources. It returns
// the error to be logged in the eventInfo sent back to the monitor.
func (c *client) reapContainer(ctr *container, p *process, exitCode int, exitedAt time.Time, eventErr error, logger *logrus.Entry) (int, error) {
// Update container status
ctr.Lock()
ctr.status = libcontainerdtypes.StatusStopped
ctr.exitedAt = exitedAt
ctr.exitCode = uint32(exitCode)
close(ctr.waitCh)
if err := c.shutdownContainer(ctr); err != nil {
exitCode = -1
logger.WithError(err).Warn("failed to shutdown container")
thisErr := errors.Wrap(err, "failed to shutdown container")
if eventErr != nil {
eventErr = errors.Wrap(eventErr, thisErr.Error())
} else {
eventErr = thisErr
}
} else {
logger.Debug("completed container shutdown")
}
ctr.Unlock()
if err := ctr.hcsContainer.Close(); err != nil {
exitCode = -1
logger.WithError(err).Error("failed to clean hcs container resources")
thisErr := errors.Wrap(err, "failed to terminate container")
if eventErr != nil {
eventErr = errors.Wrap(eventErr, thisErr.Error())
} else {
eventErr = thisErr
}
}
return exitCode, eventErr
}