Merge pull request #24984 from mlaventure/fix-restore-without-restore
Fix restoring behavior when live-restore is not set
This commit is contained in:
commit
e95d0b6bf1
5 changed files with 61 additions and 55 deletions
|
@ -343,7 +343,7 @@ func (clnt *client) newContainer(dir string, options ...CreateOption) *container
|
|||
}
|
||||
for _, option := range options {
|
||||
if err := option.Apply(container); err != nil {
|
||||
logrus.Error(err)
|
||||
logrus.Errorf("libcontainerd: newContainer(): %v", err)
|
||||
}
|
||||
}
|
||||
return container
|
||||
|
@ -391,7 +391,7 @@ func (clnt *client) restore(cont *containerd.Container, lastEvent *containerd.Ev
|
|||
clnt.lock(cont.Id)
|
||||
defer clnt.unlock(cont.Id)
|
||||
|
||||
logrus.Debugf("restore container %s state %s", cont.Id, cont.Status)
|
||||
logrus.Debugf("libcontainerd: restore container %s state %s", cont.Id, cont.Status)
|
||||
|
||||
containerID := cont.Id
|
||||
if _, err := clnt.getContainer(containerID); err == nil {
|
||||
|
@ -445,7 +445,7 @@ func (clnt *client) restore(cont *containerd.Container, lastEvent *containerd.Ev
|
|||
}})
|
||||
}
|
||||
|
||||
logrus.Warnf("unexpected backlog event: %#v", lastEvent)
|
||||
logrus.Warnf("libcontainerd: unexpected backlog event: %#v", lastEvent)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -526,7 +526,7 @@ func (clnt *client) Restore(containerID string, options ...CreateOption) error {
|
|||
// container is still alive
|
||||
if clnt.liveRestore {
|
||||
if err := clnt.restore(cont, ev, options...); err != nil {
|
||||
logrus.Errorf("error restoring %s: %v", containerID, err)
|
||||
logrus.Errorf("libcontainerd: error restoring %s: %v", containerID, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -542,19 +542,25 @@ func (clnt *client) Restore(containerID string, options ...CreateOption) error {
|
|||
container.discardFifos()
|
||||
|
||||
if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil {
|
||||
logrus.Errorf("error sending sigterm to %v: %v", containerID, err)
|
||||
logrus.Errorf("libcontainerd: error sending sigterm to %v: %v", containerID, err)
|
||||
}
|
||||
// Let the main loop handle the exit event
|
||||
clnt.remote.Unlock()
|
||||
select {
|
||||
case <-time.After(10 * time.Second):
|
||||
if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil {
|
||||
logrus.Errorf("error sending sigkill to %v: %v", containerID, err)
|
||||
logrus.Errorf("libcontainerd: error sending sigkill to %v: %v", containerID, err)
|
||||
}
|
||||
select {
|
||||
case <-time.After(2 * time.Second):
|
||||
case <-w.wait():
|
||||
// relock because of the defer
|
||||
clnt.remote.Lock()
|
||||
return nil
|
||||
}
|
||||
case <-w.wait():
|
||||
// relock because of the defer
|
||||
clnt.remote.Lock()
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ const defaultOwner = "docker"
|
|||
// Create is the entrypoint to create a container from a spec, and if successfully
|
||||
// created, start it too.
|
||||
func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) error {
|
||||
logrus.Debugln("LCD client.Create() with spec", spec)
|
||||
logrus.Debugln("libcontainerd: client.Create() with spec", spec)
|
||||
|
||||
configuration := &hcsshim.ContainerConfig{
|
||||
SystemType: "Container",
|
||||
|
@ -158,20 +158,20 @@ func (clnt *client) Create(containerID string, spec Spec, options ...CreateOptio
|
|||
container.options = options
|
||||
for _, option := range options {
|
||||
if err := option.Apply(container); err != nil {
|
||||
logrus.Error(err)
|
||||
logrus.Errorf("libcontainerd: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Call start, and if it fails, delete the container from our
|
||||
// internal structure, start will keep HCS in sync by deleting the
|
||||
// container there.
|
||||
logrus.Debugf("Create() id=%s, Calling start()", containerID)
|
||||
logrus.Debugf("libcontainerd: Create() id=%s, Calling start()", containerID)
|
||||
if err := container.start(); err != nil {
|
||||
clnt.deleteContainer(containerID)
|
||||
return err
|
||||
}
|
||||
|
||||
logrus.Debugf("Create() id=%s completed successfully", containerID)
|
||||
logrus.Debugf("libcontainerd: Create() id=%s completed successfully", containerID)
|
||||
return nil
|
||||
|
||||
}
|
||||
|
@ -210,20 +210,20 @@ func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendly
|
|||
createProcessParms.Environment = setupEnvironmentVariables(procToAdd.Env)
|
||||
createProcessParms.CommandLine = strings.Join(procToAdd.Args, " ")
|
||||
|
||||
logrus.Debugf("commandLine: %s", createProcessParms.CommandLine)
|
||||
logrus.Debugf("libcontainerd: commandLine: %s", createProcessParms.CommandLine)
|
||||
|
||||
// Start the command running in the container.
|
||||
var stdout, stderr io.ReadCloser
|
||||
var stdin io.WriteCloser
|
||||
newProcess, err := container.hcsContainer.CreateProcess(&createProcessParms)
|
||||
if err != nil {
|
||||
logrus.Errorf("AddProcess %s CreateProcess() failed %s", containerID, err)
|
||||
logrus.Errorf("libcontainerd: AddProcess(%s) CreateProcess() failed %s", containerID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
stdin, stdout, stderr, err = newProcess.Stdio()
|
||||
if err != nil {
|
||||
logrus.Errorf("%s getting std pipes failed %s", containerID, err)
|
||||
logrus.Errorf("libcontainerd: %s getting std pipes failed %s", containerID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -292,20 +292,20 @@ func (clnt *client) Signal(containerID string, sig int) error {
|
|||
|
||||
cont.manualStopRequested = true
|
||||
|
||||
logrus.Debugf("lcd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid)
|
||||
logrus.Debugf("libcontainerd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid)
|
||||
|
||||
if syscall.Signal(sig) == syscall.SIGKILL {
|
||||
// Terminate the compute system
|
||||
if err := cont.hcsContainer.Terminate(); err != nil {
|
||||
if err != hcsshim.ErrVmcomputeOperationPending {
|
||||
logrus.Errorf("Failed to terminate %s - %q", containerID, err)
|
||||
logrus.Errorf("libcontainerd: failed to terminate %s - %q", containerID, err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Terminate Process
|
||||
if err := cont.hcsProcess.Kill(); err != nil {
|
||||
// ignore errors
|
||||
logrus.Warnf("Failed to terminate pid %d in %s: %q", cont.systemPid, containerID, err)
|
||||
logrus.Warnf("libcontainerd: failed to terminate pid %d in %s: %q", cont.systemPid, containerID, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -345,13 +345,13 @@ func (clnt *client) Resize(containerID, processFriendlyName string, width, heigh
|
|||
h, w := uint16(height), uint16(width)
|
||||
|
||||
if processFriendlyName == InitFriendlyName {
|
||||
logrus.Debugln("Resizing systemPID in", containerID, cont.process.systemPid)
|
||||
logrus.Debugln("libcontainerd: resizing systemPID in", containerID, cont.process.systemPid)
|
||||
return cont.process.hcsProcess.ResizeConsole(w, h)
|
||||
}
|
||||
|
||||
for _, p := range cont.processes {
|
||||
if p.friendlyName == processFriendlyName {
|
||||
logrus.Debugln("Resizing exec'd process", containerID, p.systemPid)
|
||||
logrus.Debugln("libcontainerd: resizing exec'd process", containerID, p.systemPid)
|
||||
return p.hcsProcess.ResizeConsole(w, h)
|
||||
}
|
||||
}
|
||||
|
@ -378,7 +378,7 @@ func (clnt *client) Stats(containerID string) (*Stats, error) {
|
|||
// Restore is the handler for restoring a container
|
||||
func (clnt *client) Restore(containerID string, unusedOnWindows ...CreateOption) error {
|
||||
// TODO Windows: Implement this. For now, just tell the backend the container exited.
|
||||
logrus.Debugf("lcd Restore %s", containerID)
|
||||
logrus.Debugf("libcontainerd: Restore(%s)", containerID)
|
||||
return clnt.backend.StateChanged(containerID, StateInfo{
|
||||
CommonStateInfo: CommonStateInfo{
|
||||
State: StateExit,
|
||||
|
|
|
@ -67,7 +67,7 @@ func (ctr *container) cleanProcess(id string) {
|
|||
if p, ok := ctr.processes[id]; ok {
|
||||
for _, i := range []int{syscall.Stdin, syscall.Stdout, syscall.Stderr} {
|
||||
if err := os.Remove(p.fifo(i)); err != nil {
|
||||
logrus.Warnf("failed to remove %v for process %v: %v", p.fifo(i), id, err)
|
||||
logrus.Warnf("libcontainerd: failed to remove %v for process %v: %v", p.fifo(i), id, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -161,7 +161,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
|||
if st.State == StateExit && ctr.restartManager != nil {
|
||||
restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false, time.Since(ctr.startedAt))
|
||||
if err != nil {
|
||||
logrus.Warnf("container %s %v", ctr.containerID, err)
|
||||
logrus.Warnf("libcontainerd: container %s %v", ctr.containerID, err)
|
||||
} else if restart {
|
||||
st.State = StateRestart
|
||||
ctr.restarting = true
|
||||
|
@ -176,11 +176,11 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
|||
ctr.clean()
|
||||
ctr.client.q.append(e.Id, func() {
|
||||
if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
|
||||
logrus.Error(err)
|
||||
logrus.Errorf("libcontainerd: %v", err)
|
||||
}
|
||||
})
|
||||
if err != restartmanager.ErrRestartCanceled {
|
||||
logrus.Error(err)
|
||||
logrus.Errorf("libcontainerd: %v", err)
|
||||
}
|
||||
} else {
|
||||
ctr.start()
|
||||
|
@ -200,7 +200,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
|||
}
|
||||
ctr.client.q.append(e.Id, func() {
|
||||
if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
|
||||
logrus.Error(err)
|
||||
logrus.Errorf("libcontainerd: backend.StateChanged(): %v", err)
|
||||
}
|
||||
if e.Type == StatePause || e.Type == StateResume {
|
||||
ctr.pauseMonitor.handle(e.Type)
|
||||
|
@ -213,7 +213,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
|||
})
|
||||
|
||||
default:
|
||||
logrus.Debugf("event unhandled: %+v", e)
|
||||
logrus.Debugf("libcontainerd: event unhandled: %+v", e)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -47,13 +47,13 @@ func (ctr *container) start() error {
|
|||
|
||||
// Start the container. If this is a servicing container, this call will block
|
||||
// until the container is done with the servicing execution.
|
||||
logrus.Debugln("Starting container ", ctr.containerID)
|
||||
logrus.Debugln("libcontainerd: starting container ", ctr.containerID)
|
||||
if err = ctr.hcsContainer.Start(); err != nil {
|
||||
logrus.Errorf("Failed to start container: %s", err)
|
||||
logrus.Errorf("libcontainerd: failed to start container: %s", err)
|
||||
if err := ctr.terminate(); err != nil {
|
||||
logrus.Errorf("Failed to cleanup after a failed Start. %s", err)
|
||||
logrus.Errorf("libcontainerd: failed to cleanup after a failed Start. %s", err)
|
||||
} else {
|
||||
logrus.Debugln("Cleaned up after failed Start by calling Terminate")
|
||||
logrus.Debugln("libcontainerd: cleaned up after failed Start by calling Terminate")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
@ -79,11 +79,11 @@ func (ctr *container) start() error {
|
|||
// Start the command running in the container.
|
||||
hcsProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
|
||||
if err != nil {
|
||||
logrus.Errorf("CreateProcess() failed %s", err)
|
||||
logrus.Errorf("libcontainerd: CreateProcess() failed %s", err)
|
||||
if err := ctr.terminate(); err != nil {
|
||||
logrus.Errorf("Failed to cleanup after a failed CreateProcess. %s", err)
|
||||
logrus.Errorf("libcontainerd: failed to cleanup after a failed CreateProcess. %s", err)
|
||||
} else {
|
||||
logrus.Debugln("Cleaned up after failed CreateProcess by calling Terminate")
|
||||
logrus.Debugln("libcontainerd: cleaned up after failed CreateProcess by calling Terminate")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
@ -100,7 +100,7 @@ func (ctr *container) start() error {
|
|||
exitCode := ctr.waitProcessExitCode(&ctr.process)
|
||||
|
||||
if exitCode != 0 {
|
||||
logrus.Warnf("Servicing container %s returned non-zero exit code %d", ctr.containerID, exitCode)
|
||||
logrus.Warnf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.containerID, exitCode)
|
||||
return ctr.terminate()
|
||||
}
|
||||
|
||||
|
@ -111,9 +111,9 @@ func (ctr *container) start() error {
|
|||
var stdin io.WriteCloser
|
||||
stdin, stdout, stderr, err = hcsProcess.Stdio()
|
||||
if err != nil {
|
||||
logrus.Errorf("failed to get stdio pipes: %s", err)
|
||||
logrus.Errorf("libcontainerd: failed to get stdio pipes: %s", err)
|
||||
if err := ctr.terminate(); err != nil {
|
||||
logrus.Errorf("Failed to cleanup after a failed Stdio. %s", err)
|
||||
logrus.Errorf("libcontainerd: failed to cleanup after a failed Stdio. %s", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
@ -134,7 +134,7 @@ func (ctr *container) start() error {
|
|||
}
|
||||
|
||||
// Save the PID
|
||||
logrus.Debugf("Process started - PID %d", pid)
|
||||
logrus.Debugf("libcontainerd: process started - PID %d", pid)
|
||||
ctr.systemPid = uint32(pid)
|
||||
|
||||
// Spin up a go routine waiting for exit to handle cleanup
|
||||
|
@ -163,7 +163,7 @@ func (ctr *container) waitProcessExitCode(process *process) int {
|
|||
err := process.hcsProcess.Wait()
|
||||
if err != nil {
|
||||
if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != syscall.ERROR_BROKEN_PIPE {
|
||||
logrus.Warnf("Wait failed (container may have been killed): %s", err)
|
||||
logrus.Warnf("libcontainerd: Wait() failed (container may have been killed): %s", err)
|
||||
}
|
||||
// Fall through here, do not return. This ensures we attempt to continue the
|
||||
// shutdown in HCS and tell the docker engine that the process/container
|
||||
|
@ -173,7 +173,7 @@ func (ctr *container) waitProcessExitCode(process *process) int {
|
|||
exitCode, err := process.hcsProcess.ExitCode()
|
||||
if err != nil {
|
||||
if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != syscall.ERROR_BROKEN_PIPE {
|
||||
logrus.Warnf("Unable to get exit code from container %s", ctr.containerID)
|
||||
logrus.Warnf("libcontainerd: unable to get exit code from container %s", ctr.containerID)
|
||||
}
|
||||
// Since we got an error retrieving the exit code, make sure that the code we return
|
||||
// doesn't incorrectly indicate success.
|
||||
|
@ -185,7 +185,7 @@ func (ctr *container) waitProcessExitCode(process *process) int {
|
|||
}
|
||||
|
||||
if err := process.hcsProcess.Close(); err != nil {
|
||||
logrus.Error(err)
|
||||
logrus.Errorf("libcontainerd: hcsProcess.Close(): %v", err)
|
||||
}
|
||||
|
||||
return exitCode
|
||||
|
@ -195,7 +195,7 @@ func (ctr *container) waitProcessExitCode(process *process) int {
|
|||
// equivalent to (in the linux containerd world) where events come in for
|
||||
// state change notifications from containerd.
|
||||
func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) error {
|
||||
logrus.Debugln("waitExit on pid", process.systemPid)
|
||||
logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid)
|
||||
|
||||
exitCode := ctr.waitProcessExitCode(process)
|
||||
|
||||
|
@ -216,16 +216,16 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err
|
|||
} else {
|
||||
updatePending, err := ctr.hcsContainer.HasPendingUpdates()
|
||||
if err != nil {
|
||||
logrus.Warnf("HasPendingUpdates failed (container may have been killed): %s", err)
|
||||
logrus.Warnf("libcontainerd: HasPendingUpdates() failed (container may have been killed): %s", err)
|
||||
} else {
|
||||
si.UpdatePending = updatePending
|
||||
}
|
||||
|
||||
logrus.Debugf("Shutting down container %s", ctr.containerID)
|
||||
logrus.Debugf("libcontainerd: shutting down container %s", ctr.containerID)
|
||||
if err := ctr.shutdown(); err != nil {
|
||||
logrus.Debugf("Failed to shutdown container %s", ctr.containerID)
|
||||
logrus.Debugf("libcontainerd: failed to shutdown container %s", ctr.containerID)
|
||||
} else {
|
||||
logrus.Debugf("Completed shutting down container %s", ctr.containerID)
|
||||
logrus.Debugf("libcontainerd: completed shutting down container %s", ctr.containerID)
|
||||
}
|
||||
if err := ctr.hcsContainer.Close(); err != nil {
|
||||
logrus.Error(err)
|
||||
|
@ -263,12 +263,12 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err
|
|||
}
|
||||
|
||||
// Call into the backend to notify it of the state change.
|
||||
logrus.Debugf("waitExit() calling backend.StateChanged %+v", si)
|
||||
logrus.Debugf("libcontainerd: waitExit() calling backend.StateChanged %+v", si)
|
||||
if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
|
||||
logrus.Debugf("waitExit() completed OK, %+v", si)
|
||||
logrus.Debugf("libcontainerd: waitExit() completed OK, %+v", si)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -283,7 +283,7 @@ func (ctr *container) shutdown() error {
|
|||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.Debugf("error shutting down container %s %v calling terminate", ctr.containerID, err)
|
||||
logrus.Debugf("libcontainerd: error shutting down container %s %v calling terminate", ctr.containerID, err)
|
||||
if err := ctr.terminate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -304,7 +304,7 @@ func (ctr *container) terminate() error {
|
|||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.Debugf("error terminating container %s %v", ctr.containerID, err)
|
||||
logrus.Debugf("libcontainerd: error terminating container %s %v", ctr.containerID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
@ -141,7 +141,7 @@ func (r *remote) handleConnectionChange() {
|
|||
break
|
||||
}
|
||||
state = s
|
||||
logrus.Debugf("containerd connection state change: %v", s)
|
||||
logrus.Debugf("libcontainerd: containerd connection state change: %v", s)
|
||||
|
||||
if r.daemonPid != -1 {
|
||||
switch state {
|
||||
|
@ -155,7 +155,7 @@ func (r *remote) handleConnectionChange() {
|
|||
}
|
||||
<-r.daemonWaitCh
|
||||
if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error
|
||||
logrus.Errorf("error restarting containerd: %v", err)
|
||||
logrus.Errorf("libcontainerd: error restarting containerd: %v", err)
|
||||
}
|
||||
} else {
|
||||
state = grpc.Idle
|
||||
|
@ -290,12 +290,12 @@ func (r *remote) handleEventStream(events containerd.API_EventsClient) {
|
|||
// ignore error if grpc remote connection is closed manually
|
||||
return
|
||||
}
|
||||
logrus.Errorf("failed to receive event from containerd: %v", err)
|
||||
logrus.Errorf("libcontainerd: failed to receive event from containerd: %v", err)
|
||||
go r.startEventsMonitor()
|
||||
return
|
||||
}
|
||||
|
||||
logrus.Debugf("received containerd event: %#v", e)
|
||||
logrus.Debugf("libcontainerd: received containerd event: %#v", e)
|
||||
|
||||
var container *container
|
||||
var c *client
|
||||
|
@ -347,7 +347,7 @@ func (r *remote) runContainerdDaemon() error {
|
|||
return err
|
||||
}
|
||||
if utils.IsProcessAlive(int(pid)) {
|
||||
logrus.Infof("previous instance of containerd still alive (%d)", pid)
|
||||
logrus.Infof("libcontainerd: previous instance of containerd still alive (%d)", pid)
|
||||
r.daemonPid = int(pid)
|
||||
return nil
|
||||
}
|
||||
|
@ -385,7 +385,7 @@ func (r *remote) runContainerdDaemon() error {
|
|||
args = append(args, "--runtime-args")
|
||||
args = append(args, v)
|
||||
}
|
||||
logrus.Debugf("runContainerdDaemon: runtimeArgs: %s", args)
|
||||
logrus.Debugf("libcontainerd: runContainerdDaemon: runtimeArgs: %s", args)
|
||||
}
|
||||
|
||||
cmd := exec.Command(containerdBinary, args...)
|
||||
|
@ -403,7 +403,7 @@ func (r *remote) runContainerdDaemon() error {
|
|||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
logrus.Infof("New containerd process, pid: %d", cmd.Process.Pid)
|
||||
logrus.Infof("libcontainerd: new containerd process, pid: %d", cmd.Process.Pid)
|
||||
if err := setOOMScore(cmd.Process.Pid, r.oomScore); err != nil {
|
||||
utils.KillProcess(cmd.Process.Pid)
|
||||
return err
|
||||
|
|
Loading…
Reference in a new issue