daemon: remove v1 shim configuration for containerd

This removes the plugin section from the containerd configuration file
(`/var/run/docker/containerd/containerd.toml`) that is generated when
starting containerd as child process;

```toml
[plugins]
  [plugins.linux]
    shim = "containerd-shim"
    runtime = "runc"
    runtime_root = "/var/lib/docker/runc"
    no_shim = false
    shim_debug = true
```

This configuration doesn't appear to be used since commit:
0b14c2b67a, which switched the default runtime
to to io.containerd.runc.v2.

Note that containerd itself uses `containerd-shim` and `runc` as default
for `shim` and `runtime` v1, so omitting that configuration doesn't seem
to make a difference.

I'm slightly confused if any of the other options in this configuration were
actually used: for example, even though `runtime_root` was configured to be
`/var/lib/docker/runc`, when starting a container with that coniguration set
on docker 19.03, `/var/lib/docker/runc` doesn't appear to exist:

```console
$ docker ps
CONTAINER ID        IMAGE               COMMAND                  CREATED             STATUS              PORTS               NAMES
098baa4cb0e7        nginx:alpine        "/docker-entrypoint.…"   59 minutes ago      Up 59 minutes       80/tcp              foo

$ ls /var/lib/docker/runc
ls: /var/lib/docker/runc: No such file or directory

$ ps auxf
PID   USER     TIME  COMMAND
    1 root      0:00 sh
   16 root      0:11 dockerd --debug
   26 root      0:09 containerd --config /var/run/docker/containerd/containerd.toml --log-level debug
  234 root      0:00 containerd-shim -namespace moby -workdir /var/lib/docker/containerd/daemon/io.containerd.runtime.v1.linux/moby/09
  251 root      0:00 nginx: master process nginx -g daemon off;
  304 101       0:00 nginx: worker process
...

```

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Sebastiaan van Stijn 2020-11-16 13:31:03 +01:00
parent 011e1c71ff
commit 2c97295ad8
No known key found for this signature in database
GPG key ID: 76698F39D527CE8C
38 changed files with 0 additions and 11782 deletions

View file

@ -13,7 +13,6 @@ import (
"strconv"
"time"
"github.com/containerd/containerd/runtime/v1/linux"
"github.com/docker/docker/daemon"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/libcontainerd/supervisor"
@ -65,12 +64,6 @@ func getDaemonConfDir(_ string) (string, error) {
func (cli *DaemonCli) getPlatformContainerdDaemonOpts() ([]supervisor.DaemonOpt, error) {
opts := []supervisor.DaemonOpt{
supervisor.WithOOMScore(cli.Config.OOMScoreAdjust),
supervisor.WithPlugin("linux", &linux.Config{
Shim: config.DefaultShimBinary,
Runtime: config.DefaultRuntimeBinary,
RuntimeRoot: filepath.Join(cli.Config.Root, "runc"),
ShimDebug: cli.Config.Debug,
}),
}
return opts, nil

View file

@ -40,9 +40,6 @@ const (
DisableNetworkBridge = "none"
// DefaultInitBinary is the name of the default init binary
DefaultInitBinary = "docker-init"
// DefaultShimBinary is the default shim to be used by containerd if none
// is specified
DefaultShimBinary = "containerd-shim"
// DefaultRuntimeBinary is the default runtime to be used by
// containerd if none is specified
DefaultRuntimeBinary = "runc"

View file

@ -1,75 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"github.com/containerd/console"
"github.com/containerd/containerd/errdefs"
google_protobuf "github.com/gogo/protobuf/types"
"github.com/pkg/errors"
)
type deletedState struct {
}
func (s *deletedState) Pause(ctx context.Context) error {
return errors.Errorf("cannot pause a deleted process")
}
func (s *deletedState) Resume(ctx context.Context) error {
return errors.Errorf("cannot resume a deleted process")
}
func (s *deletedState) Update(context context.Context, r *google_protobuf.Any) error {
return errors.Errorf("cannot update a deleted process")
}
func (s *deletedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return errors.Errorf("cannot checkpoint a deleted process")
}
func (s *deletedState) Resize(ws console.WinSize) error {
return errors.Errorf("cannot resize a deleted process")
}
func (s *deletedState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a deleted process")
}
func (s *deletedState) Delete(ctx context.Context) error {
return errors.Wrap(errdefs.ErrNotFound, "cannot delete a deleted process")
}
func (s *deletedState) Kill(ctx context.Context, sig uint32, all bool) error {
return errors.Wrap(errdefs.ErrNotFound, "cannot kill a deleted process")
}
func (s *deletedState) SetExited(status int) {
// no op
}
func (s *deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return nil, errors.Errorf("cannot exec in a deleted state")
}
func (s *deletedState) Status(ctx context.Context) (string, error) {
return "stopped", nil
}

View file

@ -1,264 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"syscall"
"time"
"golang.org/x/sys/unix"
"github.com/containerd/console"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/pkg/stdio"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
)
type execProcess struct {
wg sync.WaitGroup
execState execState
mu sync.Mutex
id string
console console.Console
io *processIO
status int
exited time.Time
pid safePid
closers []io.Closer
stdin io.Closer
stdio stdio.Stdio
path string
spec specs.Process
parent *Init
waitBlock chan struct{}
}
func (e *execProcess) Wait() {
<-e.waitBlock
}
func (e *execProcess) ID() string {
return e.id
}
func (e *execProcess) Pid() int {
return e.pid.get()
}
func (e *execProcess) ExitStatus() int {
e.mu.Lock()
defer e.mu.Unlock()
return e.status
}
func (e *execProcess) ExitedAt() time.Time {
e.mu.Lock()
defer e.mu.Unlock()
return e.exited
}
func (e *execProcess) SetExited(status int) {
e.mu.Lock()
defer e.mu.Unlock()
e.execState.SetExited(status)
}
func (e *execProcess) setExited(status int) {
e.status = status
e.exited = time.Now()
e.parent.Platform.ShutdownConsole(context.Background(), e.console)
close(e.waitBlock)
}
func (e *execProcess) Delete(ctx context.Context) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Delete(ctx)
}
func (e *execProcess) delete(ctx context.Context) error {
waitTimeout(ctx, &e.wg, 2*time.Second)
if e.io != nil {
for _, c := range e.closers {
c.Close()
}
e.io.Close()
}
pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id))
// silently ignore error
os.Remove(pidfile)
return nil
}
func (e *execProcess) Resize(ws console.WinSize) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Resize(ws)
}
func (e *execProcess) resize(ws console.WinSize) error {
if e.console == nil {
return nil
}
return e.console.Resize(ws)
}
func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Kill(ctx, sig, false)
}
func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error {
pid := e.pid.get()
switch {
case pid == 0:
return errors.Wrap(errdefs.ErrFailedPrecondition, "process not created")
case !e.exited.IsZero():
return errors.Wrapf(errdefs.ErrNotFound, "process already finished")
default:
if err := unix.Kill(pid, syscall.Signal(sig)); err != nil {
return errors.Wrapf(checkKillError(err), "exec kill error")
}
}
return nil
}
func (e *execProcess) Stdin() io.Closer {
return e.stdin
}
func (e *execProcess) Stdio() stdio.Stdio {
return e.stdio
}
func (e *execProcess) Start(ctx context.Context) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Start(ctx)
}
func (e *execProcess) start(ctx context.Context) (err error) {
// The reaper may receive exit signal right after
// the container is started, before the e.pid is updated.
// In that case, we want to block the signal handler to
// access e.pid until it is updated.
e.pid.Lock()
defer e.pid.Unlock()
var (
socket *runc.Socket
pio *processIO
pidFile = newExecPidFile(e.path, e.id)
)
if e.stdio.Terminal {
if socket, err = runc.NewTempConsoleSocket(); err != nil {
return errors.Wrap(err, "failed to create runc console socket")
}
defer socket.Close()
} else {
if pio, err = createIO(ctx, e.id, e.parent.IoUID, e.parent.IoGID, e.stdio); err != nil {
return errors.Wrap(err, "failed to create init process I/O")
}
e.io = pio
}
opts := &runc.ExecOpts{
PidFile: pidFile.Path(),
Detach: true,
}
if pio != nil {
opts.IO = pio.IO()
}
if socket != nil {
opts.ConsoleSocket = socket
}
if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil {
close(e.waitBlock)
return e.parent.runtimeError(err, "OCI runtime exec failed")
}
if e.stdio.Stdin != "" {
if err := e.openStdin(e.stdio.Stdin); err != nil {
return err
}
}
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
if socket != nil {
console, err := socket.ReceiveMaster()
if err != nil {
return errors.Wrap(err, "failed to retrieve console master")
}
if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.id, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg); err != nil {
return errors.Wrap(err, "failed to start console copy")
}
} else {
if err := pio.Copy(ctx, &e.wg); err != nil {
return errors.Wrap(err, "failed to start io pipe copy")
}
}
pid, err := pidFile.Read()
if err != nil {
return errors.Wrap(err, "failed to retrieve OCI runtime exec pid")
}
e.pid.pid = pid
return nil
}
func (e *execProcess) openStdin(path string) error {
sc, err := fifo.OpenFifo(context.Background(), path, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return errors.Wrapf(err, "failed to open stdin fifo %s", path)
}
e.stdin = sc
e.closers = append(e.closers, sc)
return nil
}
func (e *execProcess) Status(ctx context.Context) (string, error) {
s, err := e.parent.Status(ctx)
if err != nil {
return "", err
}
// if the container as a whole is in the pausing/paused state, so are all
// other processes inside the container, use container state here
switch s {
case "paused", "pausing":
return s, nil
}
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Status(ctx)
}

View file

@ -1,172 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"github.com/containerd/console"
"github.com/pkg/errors"
)
type execState interface {
Resize(console.WinSize) error
Start(context.Context) error
Delete(context.Context) error
Kill(context.Context, uint32, bool) error
SetExited(int)
Status(context.Context) (string, error)
}
type execCreatedState struct {
p *execProcess
}
func (s *execCreatedState) transition(name string) error {
switch name {
case "running":
s.p.execState = &execRunningState{p: s.p}
case "stopped":
s.p.execState = &execStoppedState{p: s.p}
case "deleted":
s.p.execState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execCreatedState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
func (s *execCreatedState) Start(ctx context.Context) error {
if err := s.p.start(ctx); err != nil {
return err
}
return s.transition("running")
}
func (s *execCreatedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execCreatedState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *execCreatedState) Status(ctx context.Context) (string, error) {
return "created", nil
}
type execRunningState struct {
p *execProcess
}
func (s *execRunningState) transition(name string) error {
switch name {
case "stopped":
s.p.execState = &execStoppedState{p: s.p}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execRunningState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
func (s *execRunningState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a running process")
}
func (s *execRunningState) Delete(ctx context.Context) error {
return errors.Errorf("cannot delete a running process")
}
func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execRunningState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *execRunningState) Status(ctx context.Context) (string, error) {
return "running", nil
}
type execStoppedState struct {
p *execProcess
}
func (s *execStoppedState) transition(name string) error {
switch name {
case "deleted":
s.p.execState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execStoppedState) Resize(ws console.WinSize) error {
return errors.Errorf("cannot resize a stopped container")
}
func (s *execStoppedState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a stopped process")
}
func (s *execStoppedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execStoppedState) SetExited(status int) {
// no op
}
func (s *execStoppedState) Status(ctx context.Context) (string, error) {
return "stopped", nil
}

View file

@ -1,498 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/containerd/console"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/stdio"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
google_protobuf "github.com/gogo/protobuf/types"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// Init represents an initial process for a container
type Init struct {
wg sync.WaitGroup
initState initState
// mu is used to ensure that `Start()` and `Exited()` calls return in
// the right order when invoked in separate go routines.
// This is the case within the shim implementation as it makes use of
// the reaper interface.
mu sync.Mutex
waitBlock chan struct{}
WorkDir string
id string
Bundle string
console console.Console
Platform stdio.Platform
io *processIO
runtime *runc.Runc
// pausing preserves the pausing state.
pausing *atomicBool
status int
exited time.Time
pid int
closers []io.Closer
stdin io.Closer
stdio stdio.Stdio
Rootfs string
IoUID int
IoGID int
NoPivotRoot bool
NoNewKeyring bool
CriuWorkPath string
}
// NewRunc returns a new runc instance for a process
func NewRunc(root, path, namespace, runtime, criu string, systemd bool) *runc.Runc {
if root == "" {
root = RuncRoot
}
return &runc.Runc{
Command: runtime,
Log: filepath.Join(path, "log.json"),
LogFormat: runc.JSON,
PdeathSignal: unix.SIGKILL,
Root: filepath.Join(root, namespace),
Criu: criu,
SystemdCgroup: systemd,
}
}
// New returns a new process
func New(id string, runtime *runc.Runc, stdio stdio.Stdio) *Init {
p := &Init{
id: id,
runtime: runtime,
pausing: new(atomicBool),
stdio: stdio,
status: 0,
waitBlock: make(chan struct{}),
}
p.initState = &createdState{p: p}
return p
}
// Create the process with the provided config
func (p *Init) Create(ctx context.Context, r *CreateConfig) error {
var (
err error
socket *runc.Socket
pio *processIO
pidFile = newPidFile(p.Bundle)
)
if r.Terminal {
if socket, err = runc.NewTempConsoleSocket(); err != nil {
return errors.Wrap(err, "failed to create OCI runtime console socket")
}
defer socket.Close()
} else {
if pio, err = createIO(ctx, p.id, p.IoUID, p.IoGID, p.stdio); err != nil {
return errors.Wrap(err, "failed to create init process I/O")
}
p.io = pio
}
if r.Checkpoint != "" {
return p.createCheckpointedState(r, pidFile)
}
opts := &runc.CreateOpts{
PidFile: pidFile.Path(),
NoPivot: p.NoPivotRoot,
NoNewKeyring: p.NoNewKeyring,
}
if p.io != nil {
opts.IO = p.io.IO()
}
if socket != nil {
opts.ConsoleSocket = socket
}
if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
return p.runtimeError(err, "OCI runtime create failed")
}
if r.Stdin != "" {
if err := p.openStdin(r.Stdin); err != nil {
return err
}
}
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
if socket != nil {
console, err := socket.ReceiveMaster()
if err != nil {
return errors.Wrap(err, "failed to retrieve console master")
}
console, err = p.Platform.CopyConsole(ctx, console, p.id, r.Stdin, r.Stdout, r.Stderr, &p.wg)
if err != nil {
return errors.Wrap(err, "failed to start console copy")
}
p.console = console
} else {
if err := pio.Copy(ctx, &p.wg); err != nil {
return errors.Wrap(err, "failed to start io pipe copy")
}
}
pid, err := pidFile.Read()
if err != nil {
return errors.Wrap(err, "failed to retrieve OCI runtime container pid")
}
p.pid = pid
return nil
}
func (p *Init) openStdin(path string) error {
sc, err := fifo.OpenFifo(context.Background(), path, unix.O_WRONLY|unix.O_NONBLOCK, 0)
if err != nil {
return errors.Wrapf(err, "failed to open stdin fifo %s", path)
}
p.stdin = sc
p.closers = append(p.closers, sc)
return nil
}
func (p *Init) createCheckpointedState(r *CreateConfig, pidFile *pidFile) error {
opts := &runc.RestoreOpts{
CheckpointOpts: runc.CheckpointOpts{
ImagePath: r.Checkpoint,
WorkDir: p.CriuWorkPath,
ParentPath: r.ParentCheckpoint,
},
PidFile: pidFile.Path(),
NoPivot: p.NoPivotRoot,
Detach: true,
NoSubreaper: true,
}
if p.io != nil {
opts.IO = p.io.IO()
}
p.initState = &createdCheckpointState{
p: p,
opts: opts,
}
return nil
}
// Wait for the process to exit
func (p *Init) Wait() {
<-p.waitBlock
}
// ID of the process
func (p *Init) ID() string {
return p.id
}
// Pid of the process
func (p *Init) Pid() int {
return p.pid
}
// ExitStatus of the process
func (p *Init) ExitStatus() int {
p.mu.Lock()
defer p.mu.Unlock()
return p.status
}
// ExitedAt at time when the process exited
func (p *Init) ExitedAt() time.Time {
p.mu.Lock()
defer p.mu.Unlock()
return p.exited
}
// Status of the process
func (p *Init) Status(ctx context.Context) (string, error) {
if p.pausing.get() {
return "pausing", nil
}
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Status(ctx)
}
// Start the init process
func (p *Init) Start(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Start(ctx)
}
func (p *Init) start(ctx context.Context) error {
err := p.runtime.Start(ctx, p.id)
return p.runtimeError(err, "OCI runtime start failed")
}
// SetExited of the init process with the next status
func (p *Init) SetExited(status int) {
p.mu.Lock()
defer p.mu.Unlock()
p.initState.SetExited(status)
}
func (p *Init) setExited(status int) {
p.exited = time.Now()
p.status = status
p.Platform.ShutdownConsole(context.Background(), p.console)
close(p.waitBlock)
}
// Delete the init process
func (p *Init) Delete(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Delete(ctx)
}
func (p *Init) delete(ctx context.Context) error {
waitTimeout(ctx, &p.wg, 2*time.Second)
err := p.runtime.Delete(ctx, p.id, nil)
// ignore errors if a runtime has already deleted the process
// but we still hold metadata and pipes
//
// this is common during a checkpoint, runc will delete the container state
// after a checkpoint and the container will no longer exist within runc
if err != nil {
if strings.Contains(err.Error(), "does not exist") {
err = nil
} else {
err = p.runtimeError(err, "failed to delete task")
}
}
if p.io != nil {
for _, c := range p.closers {
c.Close()
}
p.io.Close()
}
if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil {
log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
if err == nil {
err = errors.Wrap(err2, "failed rootfs umount")
}
}
return err
}
// Resize the init processes console
func (p *Init) Resize(ws console.WinSize) error {
p.mu.Lock()
defer p.mu.Unlock()
if p.console == nil {
return nil
}
return p.console.Resize(ws)
}
// Pause the init process and all its child processes
func (p *Init) Pause(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Pause(ctx)
}
// Resume the init process and all its child processes
func (p *Init) Resume(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Resume(ctx)
}
// Kill the init process
func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Kill(ctx, signal, all)
}
func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
err := p.runtime.Kill(ctx, p.id, int(signal), &runc.KillOpts{
All: all,
})
return checkKillError(err)
}
// KillAll processes belonging to the init process
func (p *Init) KillAll(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
err := p.runtime.Kill(ctx, p.id, int(unix.SIGKILL), &runc.KillOpts{
All: true,
})
return p.runtimeError(err, "OCI runtime killall failed")
}
// Stdin of the process
func (p *Init) Stdin() io.Closer {
return p.stdin
}
// Runtime returns the OCI runtime configured for the init process
func (p *Init) Runtime() *runc.Runc {
return p.runtime
}
// Exec returns a new child process
func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Exec(ctx, path, r)
}
// exec returns a new exec'd process
func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
// process exec request
var spec specs.Process
if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
return nil, err
}
spec.Terminal = r.Terminal
e := &execProcess{
id: r.ID,
path: path,
parent: p,
spec: spec,
stdio: stdio.Stdio{
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Terminal: r.Terminal,
},
waitBlock: make(chan struct{}),
}
e.execState = &execCreatedState{p: e}
return e, nil
}
// Checkpoint the init process
func (p *Init) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Checkpoint(ctx, r)
}
func (p *Init) checkpoint(ctx context.Context, r *CheckpointConfig) error {
var actions []runc.CheckpointAction
if !r.Exit {
actions = append(actions, runc.LeaveRunning)
}
// keep criu work directory if criu work dir is set
work := r.WorkDir
if work == "" {
work = filepath.Join(p.WorkDir, "criu-work")
defer os.RemoveAll(work)
}
if err := p.runtime.Checkpoint(ctx, p.id, &runc.CheckpointOpts{
WorkDir: work,
ImagePath: r.Path,
AllowOpenTCP: r.AllowOpenTCP,
AllowExternalUnixSockets: r.AllowExternalUnixSockets,
AllowTerminal: r.AllowTerminal,
FileLocks: r.FileLocks,
EmptyNamespaces: r.EmptyNamespaces,
}, actions...); err != nil {
dumpLog := filepath.Join(p.Bundle, "criu-dump.log")
if cerr := copyFile(dumpLog, filepath.Join(work, "dump.log")); cerr != nil {
log.G(ctx).WithError(cerr).Error("failed to copy dump.log to criu-dump.log")
}
return fmt.Errorf("%s path= %s", criuError(err), dumpLog)
}
return nil
}
// Update the processes resource configuration
func (p *Init) Update(ctx context.Context, r *google_protobuf.Any) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Update(ctx, r)
}
func (p *Init) update(ctx context.Context, r *google_protobuf.Any) error {
var resources specs.LinuxResources
if err := json.Unmarshal(r.Value, &resources); err != nil {
return err
}
return p.runtime.Update(ctx, p.id, &resources)
}
// Stdio of the process
func (p *Init) Stdio() stdio.Stdio {
return p.stdio
}
func (p *Init) runtimeError(rErr error, msg string) error {
if rErr == nil {
return nil
}
rMsg, err := getLastRuntimeError(p.runtime)
switch {
case err != nil:
return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error())
case rMsg == "":
return errors.Wrap(rErr, msg)
default:
return errors.Errorf("%s: %s", msg, rMsg)
}
}
func withConditionalIO(c stdio.Stdio) runc.IOOpt {
return func(o *runc.IOOption) {
o.OpenStdin = c.Stdin != ""
o.OpenStdout = c.Stdout != ""
o.OpenStderr = c.Stderr != ""
}
}

View file

@ -1,414 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
runc "github.com/containerd/go-runc"
google_protobuf "github.com/gogo/protobuf/types"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
type initState interface {
Start(context.Context) error
Delete(context.Context) error
Pause(context.Context) error
Resume(context.Context) error
Update(context.Context, *google_protobuf.Any) error
Checkpoint(context.Context, *CheckpointConfig) error
Exec(context.Context, string, *ExecConfig) (Process, error)
Kill(context.Context, uint32, bool) error
SetExited(int)
Status(context.Context) (string, error)
}
type createdState struct {
p *Init
}
func (s *createdState) transition(name string) error {
switch name {
case "running":
s.p.initState = &runningState{p: s.p}
case "stopped":
s.p.initState = &stoppedState{p: s.p}
case "deleted":
s.p.initState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *createdState) Pause(ctx context.Context) error {
return errors.Errorf("cannot pause task in created state")
}
func (s *createdState) Resume(ctx context.Context) error {
return errors.Errorf("cannot resume task in created state")
}
func (s *createdState) Update(ctx context.Context, r *google_protobuf.Any) error {
return s.p.update(ctx, r)
}
func (s *createdState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return errors.Errorf("cannot checkpoint a task in created state")
}
func (s *createdState) Start(ctx context.Context) error {
if err := s.p.start(ctx); err != nil {
return err
}
return s.transition("running")
}
func (s *createdState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *createdState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return s.p.exec(ctx, path, r)
}
func (s *createdState) Status(ctx context.Context) (string, error) {
return "created", nil
}
type createdCheckpointState struct {
p *Init
opts *runc.RestoreOpts
}
func (s *createdCheckpointState) transition(name string) error {
switch name {
case "running":
s.p.initState = &runningState{p: s.p}
case "stopped":
s.p.initState = &stoppedState{p: s.p}
case "deleted":
s.p.initState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *createdCheckpointState) Pause(ctx context.Context) error {
return errors.Errorf("cannot pause task in created state")
}
func (s *createdCheckpointState) Resume(ctx context.Context) error {
return errors.Errorf("cannot resume task in created state")
}
func (s *createdCheckpointState) Update(ctx context.Context, r *google_protobuf.Any) error {
return s.p.update(ctx, r)
}
func (s *createdCheckpointState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return errors.Errorf("cannot checkpoint a task in created state")
}
func (s *createdCheckpointState) Start(ctx context.Context) error {
p := s.p
sio := p.stdio
var (
err error
socket *runc.Socket
)
if sio.Terminal {
if socket, err = runc.NewTempConsoleSocket(); err != nil {
return errors.Wrap(err, "failed to create OCI runtime console socket")
}
defer socket.Close()
s.opts.ConsoleSocket = socket
}
if _, err := s.p.runtime.Restore(ctx, p.id, p.Bundle, s.opts); err != nil {
return p.runtimeError(err, "OCI runtime restore failed")
}
if sio.Stdin != "" {
if err := p.openStdin(sio.Stdin); err != nil {
return errors.Wrapf(err, "failed to open stdin fifo %s", sio.Stdin)
}
}
if socket != nil {
console, err := socket.ReceiveMaster()
if err != nil {
return errors.Wrap(err, "failed to retrieve console master")
}
console, err = p.Platform.CopyConsole(ctx, console, p.id, sio.Stdin, sio.Stdout, sio.Stderr, &p.wg)
if err != nil {
return errors.Wrap(err, "failed to start console copy")
}
p.console = console
} else {
if err := p.io.Copy(ctx, &p.wg); err != nil {
return errors.Wrap(err, "failed to start io pipe copy")
}
}
pid, err := runc.ReadPidFile(s.opts.PidFile)
if err != nil {
return errors.Wrap(err, "failed to retrieve OCI runtime container pid")
}
p.pid = pid
return s.transition("running")
}
func (s *createdCheckpointState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *createdCheckpointState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *createdCheckpointState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *createdCheckpointState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return nil, errors.Errorf("cannot exec in a created state")
}
func (s *createdCheckpointState) Status(ctx context.Context) (string, error) {
return "created", nil
}
type runningState struct {
p *Init
}
func (s *runningState) transition(name string) error {
switch name {
case "stopped":
s.p.initState = &stoppedState{p: s.p}
case "paused":
s.p.initState = &pausedState{p: s.p}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *runningState) Pause(ctx context.Context) error {
s.p.pausing.set(true)
// NOTE "pausing" will be returned in the short window
// after `transition("paused")`, before `pausing` is reset
// to false. That doesn't break the state machine, just
// delays the "paused" state a little bit.
defer s.p.pausing.set(false)
if err := s.p.runtime.Pause(ctx, s.p.id); err != nil {
return s.p.runtimeError(err, "OCI runtime pause failed")
}
return s.transition("paused")
}
func (s *runningState) Resume(ctx context.Context) error {
return errors.Errorf("cannot resume a running process")
}
func (s *runningState) Update(ctx context.Context, r *google_protobuf.Any) error {
return s.p.update(ctx, r)
}
func (s *runningState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return s.p.checkpoint(ctx, r)
}
func (s *runningState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a running process")
}
func (s *runningState) Delete(ctx context.Context) error {
return errors.Errorf("cannot delete a running process")
}
func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *runningState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return s.p.exec(ctx, path, r)
}
func (s *runningState) Status(ctx context.Context) (string, error) {
return "running", nil
}
type pausedState struct {
p *Init
}
func (s *pausedState) transition(name string) error {
switch name {
case "running":
s.p.initState = &runningState{p: s.p}
case "stopped":
s.p.initState = &stoppedState{p: s.p}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *pausedState) Pause(ctx context.Context) error {
return errors.Errorf("cannot pause a paused container")
}
func (s *pausedState) Resume(ctx context.Context) error {
if err := s.p.runtime.Resume(ctx, s.p.id); err != nil {
return s.p.runtimeError(err, "OCI runtime resume failed")
}
return s.transition("running")
}
func (s *pausedState) Update(ctx context.Context, r *google_protobuf.Any) error {
return s.p.update(ctx, r)
}
func (s *pausedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return s.p.checkpoint(ctx, r)
}
func (s *pausedState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a paused process")
}
func (s *pausedState) Delete(ctx context.Context) error {
return errors.Errorf("cannot delete a paused process")
}
func (s *pausedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *pausedState) SetExited(status int) {
s.p.setExited(status)
if err := s.p.runtime.Resume(context.Background(), s.p.id); err != nil {
logrus.WithError(err).Error("resuming exited container from paused state")
}
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *pausedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return nil, errors.Errorf("cannot exec in a paused state")
}
func (s *pausedState) Status(ctx context.Context) (string, error) {
return "paused", nil
}
type stoppedState struct {
p *Init
}
func (s *stoppedState) transition(name string) error {
switch name {
case "deleted":
s.p.initState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *stoppedState) Pause(ctx context.Context) error {
return errors.Errorf("cannot pause a stopped container")
}
func (s *stoppedState) Resume(ctx context.Context) error {
return errors.Errorf("cannot resume a stopped container")
}
func (s *stoppedState) Update(ctx context.Context, r *google_protobuf.Any) error {
return errors.Errorf("cannot update a stopped container")
}
func (s *stoppedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return errors.Errorf("cannot checkpoint a stopped container")
}
func (s *stoppedState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a stopped process")
}
func (s *stoppedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *stoppedState) SetExited(status int) {
// no op
}
func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return nil, errors.Errorf("cannot exec in a stopped state")
}
func (s *stoppedState) Status(ctx context.Context) (string, error) {
return "stopped", nil
}

View file

@ -1,439 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"fmt"
"io"
"net/url"
"os"
"os/exec"
"path/filepath"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/pkg/stdio"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
"github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
)
const binaryIOProcTermTimeout = 12 * time.Second // Give logger process solid 10 seconds for cleanup
var bufPool = sync.Pool{
New: func() interface{} {
// setting to 4096 to align with PIPE_BUF
// http://man7.org/linux/man-pages/man7/pipe.7.html
buffer := make([]byte, 4096)
return &buffer
},
}
type processIO struct {
io runc.IO
uri *url.URL
copy bool
stdio stdio.Stdio
}
func (p *processIO) Close() error {
if p.io != nil {
return p.io.Close()
}
return nil
}
func (p *processIO) IO() runc.IO {
return p.io
}
func (p *processIO) Copy(ctx context.Context, wg *sync.WaitGroup) error {
if !p.copy {
return nil
}
var cwg sync.WaitGroup
if err := copyPipes(ctx, p.IO(), p.stdio.Stdin, p.stdio.Stdout, p.stdio.Stderr, wg, &cwg); err != nil {
return errors.Wrap(err, "unable to copy pipes")
}
cwg.Wait()
return nil
}
func createIO(ctx context.Context, id string, ioUID, ioGID int, stdio stdio.Stdio) (*processIO, error) {
pio := &processIO{
stdio: stdio,
}
if stdio.IsNull() {
i, err := runc.NewNullIO()
if err != nil {
return nil, err
}
pio.io = i
return pio, nil
}
u, err := url.Parse(stdio.Stdout)
if err != nil {
return nil, errors.Wrap(err, "unable to parse stdout uri")
}
if u.Scheme == "" {
u.Scheme = "fifo"
}
pio.uri = u
switch u.Scheme {
case "fifo":
pio.copy = true
pio.io, err = runc.NewPipeIO(ioUID, ioGID, withConditionalIO(stdio))
case "binary":
pio.io, err = NewBinaryIO(ctx, id, u)
case "file":
filePath := u.Path
if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil {
return nil, err
}
var f *os.File
f, err = os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return nil, err
}
f.Close()
pio.stdio.Stdout = filePath
pio.stdio.Stderr = filePath
pio.copy = true
pio.io, err = runc.NewPipeIO(ioUID, ioGID, withConditionalIO(stdio))
default:
return nil, errors.Errorf("unknown STDIO scheme %s", u.Scheme)
}
if err != nil {
return nil, err
}
return pio, nil
}
func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error {
var sameFile *countingWriteCloser
for _, i := range []struct {
name string
dest func(wc io.WriteCloser, rc io.Closer)
}{
{
name: stdout,
dest: func(wc io.WriteCloser, rc io.Closer) {
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil {
log.G(ctx).Warn("error copying stdout")
}
wg.Done()
wc.Close()
if rc != nil {
rc.Close()
}
}()
},
}, {
name: stderr,
dest: func(wc io.WriteCloser, rc io.Closer) {
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil {
log.G(ctx).Warn("error copying stderr")
}
wg.Done()
wc.Close()
if rc != nil {
rc.Close()
}
}()
},
},
} {
ok, err := fifo.IsFifo(i.name)
if err != nil {
return err
}
var (
fw io.WriteCloser
fr io.Closer
)
if ok {
if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil {
return errors.Wrapf(err, "containerd-shim: opening w/o fifo %q failed", i.name)
}
if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil {
return errors.Wrapf(err, "containerd-shim: opening r/o fifo %q failed", i.name)
}
} else {
if sameFile != nil {
sameFile.count++
i.dest(sameFile, nil)
continue
}
if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil {
return errors.Wrapf(err, "containerd-shim: opening file %q failed", i.name)
}
if stdout == stderr {
sameFile = &countingWriteCloser{
WriteCloser: fw,
count: 1,
}
}
}
i.dest(fw, fr)
}
if stdin == "" {
return nil
}
f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return fmt.Errorf("containerd-shim: opening %s failed: %s", stdin, err)
}
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(rio.Stdin(), f, *p)
rio.Stdin().Close()
f.Close()
}()
return nil
}
// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times.
type countingWriteCloser struct {
io.WriteCloser
count int64
}
func (c *countingWriteCloser) Close() error {
if atomic.AddInt64(&c.count, -1) > 0 {
return nil
}
return c.WriteCloser.Close()
}
// NewBinaryIO runs a custom binary process for pluggable shim logging
func NewBinaryIO(ctx context.Context, id string, uri *url.URL) (_ runc.IO, err error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
var closers []func() error
defer func() {
if err == nil {
return
}
result := multierror.Append(err)
for _, fn := range closers {
result = multierror.Append(result, fn())
}
err = multierror.Flatten(result)
}()
out, err := newPipe()
if err != nil {
return nil, errors.Wrap(err, "failed to create stdout pipes")
}
closers = append(closers, out.Close)
serr, err := newPipe()
if err != nil {
return nil, errors.Wrap(err, "failed to create stderr pipes")
}
closers = append(closers, serr.Close)
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
closers = append(closers, r.Close, w.Close)
cmd := NewBinaryCmd(uri, id, ns)
cmd.ExtraFiles = append(cmd.ExtraFiles, out.r, serr.r, w)
// don't need to register this with the reaper or wait when
// running inside a shim
if err := cmd.Start(); err != nil {
return nil, errors.Wrap(err, "failed to start binary process")
}
closers = append(closers, func() error { return cmd.Process.Kill() })
// close our side of the pipe after start
if err := w.Close(); err != nil {
return nil, errors.Wrap(err, "failed to close write pipe after start")
}
// wait for the logging binary to be ready
b := make([]byte, 1)
if _, err := r.Read(b); err != nil && err != io.EOF {
return nil, errors.Wrap(err, "failed to read from logging binary")
}
return &binaryIO{
cmd: cmd,
out: out,
err: serr,
}, nil
}
type binaryIO struct {
cmd *exec.Cmd
out, err *pipe
}
func (b *binaryIO) CloseAfterStart() error {
var (
result *multierror.Error
)
for _, v := range []*pipe{b.out, b.err} {
if v != nil {
if err := v.r.Close(); err != nil {
result = multierror.Append(result, err)
}
}
}
return result.ErrorOrNil()
}
func (b *binaryIO) Close() error {
var (
result *multierror.Error
)
for _, v := range []*pipe{b.out, b.err} {
if v != nil {
if err := v.Close(); err != nil {
result = multierror.Append(result, err)
}
}
}
if err := b.cancel(); err != nil {
result = multierror.Append(result, err)
}
return result.ErrorOrNil()
}
func (b *binaryIO) cancel() error {
if b.cmd == nil || b.cmd.Process == nil {
return nil
}
// Send SIGTERM first, so logger process has a chance to flush and exit properly
if err := b.cmd.Process.Signal(syscall.SIGTERM); err != nil {
result := multierror.Append(errors.Wrap(err, "failed to send SIGTERM"))
log.L.WithError(err).Warn("failed to send SIGTERM signal, killing logging shim")
if err := b.cmd.Process.Kill(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to kill process after faulty SIGTERM"))
}
return result.ErrorOrNil()
}
done := make(chan error, 1)
go func() {
done <- b.cmd.Wait()
}()
select {
case err := <-done:
return err
case <-time.After(binaryIOProcTermTimeout):
log.L.Warn("failed to wait for shim logger process to exit, killing")
err := b.cmd.Process.Kill()
if err != nil {
return errors.Wrap(err, "failed to kill shim logger process")
}
return nil
}
}
func (b *binaryIO) Stdin() io.WriteCloser {
return nil
}
func (b *binaryIO) Stdout() io.ReadCloser {
return nil
}
func (b *binaryIO) Stderr() io.ReadCloser {
return nil
}
func (b *binaryIO) Set(cmd *exec.Cmd) {
if b.out != nil {
cmd.Stdout = b.out.w
}
if b.err != nil {
cmd.Stderr = b.err.w
}
}
func newPipe() (*pipe, error) {
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
return &pipe{
r: r,
w: w,
}, nil
}
type pipe struct {
r *os.File
w *os.File
}
func (p *pipe) Close() error {
var result *multierror.Error
if err := p.w.Close(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to close write pipe"))
}
if err := p.r.Close(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to close read pipe"))
}
return multierror.Prefix(result.ErrorOrNil(), "pipe:")
}

View file

@ -1,53 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"net/url"
"os"
"os/exec"
)
// NewBinaryCmd returns a Cmd to be used to start a logging binary.
// The Cmd is generated from the provided uri, and the container ID and
// namespace are appended to the Cmd environment.
func NewBinaryCmd(binaryURI *url.URL, id, ns string) *exec.Cmd {
var args []string
for k, vs := range binaryURI.Query() {
args = append(args, k)
if len(vs) > 0 {
args = append(args, vs[0])
}
}
cmd := exec.Command(binaryURI.Path, args...)
cmd.Env = append(cmd.Env,
"CONTAINER_ID="+id,
"CONTAINER_NAMESPACE="+ns,
)
return cmd
}
// CloseFiles closes any files passed in.
// It it used for cleanup in the event of unexpected errors.
func CloseFiles(files ...*os.File) {
for _, file := range files {
file.Close()
}
}

View file

@ -1,56 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"io"
"time"
"github.com/containerd/console"
"github.com/containerd/containerd/pkg/stdio"
)
// Process on a system
type Process interface {
// ID returns the id for the process
ID() string
// Pid returns the pid for the process
Pid() int
// ExitStatus returns the exit status
ExitStatus() int
// ExitedAt is the time the process exited
ExitedAt() time.Time
// Stdin returns the process STDIN
Stdin() io.Closer
// Stdio returns io information for the container
Stdio() stdio.Stdio
// Status returns the process status
Status(context.Context) (string, error)
// Wait blocks until the process has exited
Wait()
// Resize resizes the process console
Resize(ws console.WinSize) error
// Start execution of the process
Start(context.Context) error
// Delete deletes the process and its resourcess
Delete(context.Context) error
// Kill kills the process
Kill(context.Context, uint32, bool) error
// SetExited sets the exit status for the process
SetExited(status int)
}

View file

@ -1,66 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
google_protobuf "github.com/gogo/protobuf/types"
)
// Mount holds filesystem mount configuration
type Mount struct {
Type string
Source string
Target string
Options []string
}
// CreateConfig hold task creation configuration
type CreateConfig struct {
ID string
Bundle string
Runtime string
Rootfs []Mount
Terminal bool
Stdin string
Stdout string
Stderr string
Checkpoint string
ParentCheckpoint string
Options *google_protobuf.Any
}
// ExecConfig holds exec creation configuration
type ExecConfig struct {
ID string
Terminal bool
Stdin string
Stdout string
Stderr string
Spec *google_protobuf.Any
}
// CheckpointConfig holds task checkpoint configuration
type CheckpointConfig struct {
WorkDir string
Path string
Exit bool
AllowOpenTCP bool
AllowExternalUnixSockets bool
AllowTerminal bool
FileLocks bool
EmptyNamespaces []string
}

View file

@ -1,202 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/containerd/containerd/errdefs"
runc "github.com/containerd/go-runc"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
const (
// RuncRoot is the path to the root runc state directory
RuncRoot = "/run/containerd/runc"
// InitPidFile name of the file that contains the init pid
InitPidFile = "init.pid"
)
// safePid is a thread safe wrapper for pid.
type safePid struct {
sync.Mutex
pid int
}
func (s *safePid) get() int {
s.Lock()
defer s.Unlock()
return s.pid
}
type atomicBool int32
func (ab *atomicBool) set(b bool) {
if b {
atomic.StoreInt32((*int32)(ab), 1)
} else {
atomic.StoreInt32((*int32)(ab), 0)
}
}
func (ab *atomicBool) get() bool {
return atomic.LoadInt32((*int32)(ab)) == 1
}
// TODO(mlaventure): move to runc package?
func getLastRuntimeError(r *runc.Runc) (string, error) {
if r.Log == "" {
return "", nil
}
f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400)
if err != nil {
return "", err
}
defer f.Close()
var (
errMsg string
log struct {
Level string
Msg string
Time time.Time
}
)
dec := json.NewDecoder(f)
for err = nil; err == nil; {
if err = dec.Decode(&log); err != nil && err != io.EOF {
return "", err
}
if log.Level == "error" {
errMsg = strings.TrimSpace(log.Msg)
}
}
return errMsg, nil
}
// criuError returns only the first line of the error message from criu
// it tries to add an invalid dump log location when returning the message
func criuError(err error) string {
parts := strings.Split(err.Error(), "\n")
return parts[0]
}
func copyFile(to, from string) error {
ff, err := os.Open(from)
if err != nil {
return err
}
defer ff.Close()
tt, err := os.Create(to)
if err != nil {
return err
}
defer tt.Close()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
_, err = io.CopyBuffer(tt, ff, *p)
return err
}
func checkKillError(err error) error {
if err == nil {
return nil
}
if strings.Contains(err.Error(), "os: process already finished") ||
strings.Contains(err.Error(), "container not running") ||
strings.Contains(strings.ToLower(err.Error()), "no such process") ||
err == unix.ESRCH {
return errors.Wrapf(errdefs.ErrNotFound, "process already finished")
} else if strings.Contains(err.Error(), "does not exist") {
return errors.Wrapf(errdefs.ErrNotFound, "no such container")
}
return errors.Wrapf(err, "unknown error after kill")
}
func newPidFile(bundle string) *pidFile {
return &pidFile{
path: filepath.Join(bundle, InitPidFile),
}
}
func newExecPidFile(bundle, id string) *pidFile {
return &pidFile{
path: filepath.Join(bundle, fmt.Sprintf("%s.pid", id)),
}
}
type pidFile struct {
path string
}
func (p *pidFile) Path() string {
return p.path
}
func (p *pidFile) Read() (int, error) {
return runc.ReadPidFile(p.path)
}
// waitTimeout handles waiting on a waitgroup with a specified timeout.
// this is commonly used for waiting on IO to finish after a process has exited
func waitTimeout(ctx context.Context, wg *sync.WaitGroup, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
select {
case <-done:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func stateName(v interface{}) string {
switch v.(type) {
case *runningState, *execRunningState:
return "running"
case *createdState, *execCreatedState, *createdCheckpointState:
return "created"
case *pausedState:
return "paused"
case *deletedState:
return "deleted"
case *stoppedState:
return "stopped"
}
panic(errors.Errorf("invalid state %v", v))
}

View file

@ -1,33 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package stdio
import (
"context"
"sync"
"github.com/containerd/console"
)
// Platform handles platform-specific behavior that may differs across
// platform implementations
type Platform interface {
CopyConsole(ctx context.Context, console console.Console, id, stdin, stdout, stderr string,
wg *sync.WaitGroup) (console.Console, error)
ShutdownConsole(ctx context.Context, console console.Console) error
Close() error
}

View file

@ -1,30 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package stdio
// Stdio of a process
type Stdio struct {
Stdin string
Stdout string
Stderr string
Terminal bool
}
// IsNull returns true if the stdio is not defined
func (s Stdio) IsNull() bool {
return s.Stdin == "" && s.Stdout == "" && s.Stderr == ""
}

View file

@ -1,42 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
const (
// TaskCreateEventTopic for task create
TaskCreateEventTopic = "/tasks/create"
// TaskStartEventTopic for task start
TaskStartEventTopic = "/tasks/start"
// TaskOOMEventTopic for task oom
TaskOOMEventTopic = "/tasks/oom"
// TaskExitEventTopic for task exit
TaskExitEventTopic = "/tasks/exit"
// TaskDeleteEventTopic for task delete
TaskDeleteEventTopic = "/tasks/delete"
// TaskExecAddedEventTopic for task exec create
TaskExecAddedEventTopic = "/tasks/exec-added"
// TaskExecStartedEventTopic for task exec start
TaskExecStartedEventTopic = "/tasks/exec-started"
// TaskPausedEventTopic for task pause
TaskPausedEventTopic = "/tasks/paused"
// TaskResumedEventTopic for task resume
TaskResumedEventTopic = "/tasks/resumed"
// TaskCheckpointedEventTopic for task checkpoint
TaskCheckpointedEventTopic = "/tasks/checkpointed"
// TaskUnknownTopic for unknown task events
TaskUnknownTopic = "/tasks/?"
)

View file

@ -1,70 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
// TaskMonitor provides an interface for monitoring of containers within containerd
type TaskMonitor interface {
// Monitor adds the provided container to the monitor
Monitor(Task) error
// Stop stops and removes the provided container from the monitor
Stop(Task) error
}
// NewMultiTaskMonitor returns a new TaskMonitor broadcasting to the provided monitors
func NewMultiTaskMonitor(monitors ...TaskMonitor) TaskMonitor {
return &multiTaskMonitor{
monitors: monitors,
}
}
// NewNoopMonitor is a task monitor that does nothing
func NewNoopMonitor() TaskMonitor {
return &noopTaskMonitor{}
}
type noopTaskMonitor struct {
}
func (mm *noopTaskMonitor) Monitor(c Task) error {
return nil
}
func (mm *noopTaskMonitor) Stop(c Task) error {
return nil
}
type multiTaskMonitor struct {
monitors []TaskMonitor
}
func (mm *multiTaskMonitor) Monitor(c Task) error {
for _, m := range mm.monitors {
if err := m.Monitor(c); err != nil {
return err
}
}
return nil
}
func (mm *multiTaskMonitor) Stop(c Task) error {
for _, m := range mm.monitors {
if err := m.Stop(c); err != nil {
return err
}
}
return nil
}

View file

@ -1,76 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"context"
"time"
"github.com/containerd/containerd/mount"
"github.com/gogo/protobuf/types"
)
// IO holds process IO information
type IO struct {
Stdin string
Stdout string
Stderr string
Terminal bool
}
// CreateOpts contains task creation data
type CreateOpts struct {
// Spec is the OCI runtime spec
Spec *types.Any
// Rootfs mounts to perform to gain access to the container's filesystem
Rootfs []mount.Mount
// IO for the container's main process
IO IO
// Checkpoint digest to restore container state
Checkpoint string
// RuntimeOptions for the runtime
RuntimeOptions *types.Any
// TaskOptions received for the task
TaskOptions *types.Any
// Runtime to use
Runtime string
}
// Exit information for a process
type Exit struct {
Pid uint32
Status uint32
Timestamp time.Time
}
// PlatformRuntime is responsible for the creation and management of
// tasks and processes for a platform.
type PlatformRuntime interface {
// ID of the runtime
ID() string
// Create creates a task with the provided id and options.
Create(ctx context.Context, taskID string, opts CreateOpts) (Task, error)
// Get returns a task.
Get(ctx context.Context, taskID string) (Task, error)
// Tasks returns all the current tasks for the runtime.
// Any container runs at most one task at a time.
Tasks(ctx context.Context, all bool) ([]Task, error)
// Add adds a task into runtime.
Add(ctx context.Context, task Task) error
// Delete remove a task.
Delete(ctx context.Context, taskID string)
}

View file

@ -1,135 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"context"
"time"
"github.com/gogo/protobuf/types"
)
// TaskInfo provides task specific information
type TaskInfo struct {
ID string
Runtime string
Spec []byte
Namespace string
}
// Process is a runtime object for an executing process inside a container
type Process interface {
// ID of the process
ID() string
// State returns the process state
State(ctx context.Context) (State, error)
// Kill signals a container
Kill(ctx context.Context, signal uint32, all bool) error
// ResizePty resizes the processes pty/console
ResizePty(ctx context.Context, size ConsoleSize) error
// CloseIO closes the processes IO
CloseIO(ctx context.Context) error
// Start the container's user defined process
Start(ctx context.Context) error
// Wait for the process to exit
Wait(ctx context.Context) (*Exit, error)
// Delete deletes the process
Delete(ctx context.Context) (*Exit, error)
}
// Task is the runtime object for an executing container
type Task interface {
Process
// PID of the process
PID() uint32
// Namespace that the task exists in
Namespace() string
// Pause pauses the container process
Pause(ctx context.Context) error
// Resume unpauses the container process
Resume(ctx context.Context) error
// Exec adds a process into the container
Exec(ctx context.Context, id string, opts ExecOpts) (Process, error)
// Pids returns all pids
Pids(ctx context.Context) ([]ProcessInfo, error)
// Checkpoint checkpoints a container to an image with live system data
Checkpoint(ctx context.Context, path string, opts *types.Any) error
// Update sets the provided resources to a running task
Update(ctx context.Context, resources *types.Any, annotations map[string]string) error
// Process returns a process within the task for the provided id
Process(ctx context.Context, id string) (Process, error)
// Stats returns runtime specific metrics for a task
Stats(ctx context.Context) (*types.Any, error)
}
// ExecOpts provides additional options for additional processes running in a task
type ExecOpts struct {
Spec *types.Any
IO IO
}
// ConsoleSize of a pty or windows terminal
type ConsoleSize struct {
Width uint32
Height uint32
}
// Status is the runtime status of a task and/or process
type Status int
const (
// CreatedStatus when a process has been created
CreatedStatus Status = iota + 1
// RunningStatus when a process is running
RunningStatus
// StoppedStatus when a process has stopped
StoppedStatus
// DeletedStatus when a process has been deleted
DeletedStatus
// PausedStatus when a process is paused
PausedStatus
// PausingStatus when a process is currently pausing
PausingStatus
)
// State information for a process
type State struct {
// Status is the current status of the container
Status Status
// Pid is the main process id for the container
Pid uint32
// ExitStatus of the process
// Only valid if the Status is Stopped
ExitStatus uint32
// ExitedAt is the time at which the process exited
// Only valid if the Status is Stopped
ExitedAt time.Time
Stdin string
Stdout string
Stderr string
Terminal bool
}
// ProcessInfo holds platform specific process information
type ProcessInfo struct {
// Pid is the process ID
Pid uint32
// Info includes additional process information
// Info varies by platform
Info interface{}
}

View file

@ -1,130 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"context"
"sync"
"github.com/containerd/containerd/namespaces"
"github.com/pkg/errors"
)
var (
// ErrTaskNotExists is returned when a task does not exist
ErrTaskNotExists = errors.New("task does not exist")
// ErrTaskAlreadyExists is returned when a task already exists
ErrTaskAlreadyExists = errors.New("task already exists")
)
// NewTaskList returns a new TaskList
func NewTaskList() *TaskList {
return &TaskList{
tasks: make(map[string]map[string]Task),
}
}
// TaskList holds and provides locking around tasks
type TaskList struct {
mu sync.Mutex
tasks map[string]map[string]Task
}
// Get a task
func (l *TaskList) Get(ctx context.Context, id string) (Task, error) {
l.mu.Lock()
defer l.mu.Unlock()
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
tasks, ok := l.tasks[namespace]
if !ok {
return nil, ErrTaskNotExists
}
t, ok := tasks[id]
if !ok {
return nil, ErrTaskNotExists
}
return t, nil
}
// GetAll tasks under a namespace
func (l *TaskList) GetAll(ctx context.Context, noNS bool) ([]Task, error) {
l.mu.Lock()
defer l.mu.Unlock()
var o []Task
if noNS {
for ns := range l.tasks {
for _, t := range l.tasks[ns] {
o = append(o, t)
}
}
return o, nil
}
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
tasks, ok := l.tasks[namespace]
if !ok {
return o, nil
}
for _, t := range tasks {
o = append(o, t)
}
return o, nil
}
// Add a task
func (l *TaskList) Add(ctx context.Context, t Task) error {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
return l.AddWithNamespace(namespace, t)
}
// AddWithNamespace adds a task with the provided namespace
func (l *TaskList) AddWithNamespace(namespace string, t Task) error {
l.mu.Lock()
defer l.mu.Unlock()
id := t.ID()
if _, ok := l.tasks[namespace]; !ok {
l.tasks[namespace] = make(map[string]Task)
}
if _, ok := l.tasks[namespace][id]; ok {
return errors.Wrap(ErrTaskAlreadyExists, id)
}
l.tasks[namespace][id] = t
return nil
}
// Delete a task
func (l *TaskList) Delete(ctx context.Context, id string) {
l.mu.Lock()
defer l.mu.Unlock()
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return
}
tasks, ok := l.tasks[namespace]
if ok {
delete(tasks, id)
}
}

View file

@ -1,34 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"strconv"
"github.com/containerd/typeurl"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func init() {
const prefix = "types.containerd.io"
// register TypeUrls for commonly marshaled external types
major := strconv.Itoa(specs.VersionMajor)
typeurl.Register(&specs.Spec{}, prefix, "opencontainers/runtime-spec", major, "Spec")
typeurl.Register(&specs.Process{}, prefix, "opencontainers/runtime-spec", major, "Process")
typeurl.Register(&specs.LinuxResources{}, prefix, "opencontainers/runtime-spec", major, "LinuxResources")
typeurl.Register(&specs.WindowsResources{}, prefix, "opencontainers/runtime-spec", major, "WindowsResources")
}

View file

@ -1,253 +0,0 @@
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package linux
import (
"context"
"crypto/sha256"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"github.com/containerd/containerd/events/exchange"
"github.com/containerd/containerd/runtime/linux/runctypes"
"github.com/containerd/containerd/runtime/v1/shim"
"github.com/containerd/containerd/runtime/v1/shim/client"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
)
// loadBundle loads an existing bundle from disk
func loadBundle(id, path, workdir string) *bundle {
return &bundle{
id: id,
path: path,
workDir: workdir,
}
}
// newBundle creates a new bundle on disk at the provided path for the given id
func newBundle(id, path, workDir string, spec []byte) (b *bundle, err error) {
if err := os.MkdirAll(path, 0711); err != nil {
return nil, err
}
path = filepath.Join(path, id)
if err := os.Mkdir(path, 0700); err != nil {
return nil, err
}
defer func() {
if err != nil {
os.RemoveAll(path)
}
}()
if err := prepareBundleDirectoryPermissions(path, spec); err != nil {
return nil, err
}
workDir = filepath.Join(workDir, id)
if err := os.MkdirAll(workDir, 0711); err != nil {
return nil, err
}
defer func() {
if err != nil {
os.RemoveAll(workDir)
}
}()
rootfs := filepath.Join(path, "rootfs")
if err := os.MkdirAll(rootfs, 0711); err != nil {
return nil, err
}
err = ioutil.WriteFile(filepath.Join(path, configFilename), spec, 0666)
return &bundle{
id: id,
path: path,
workDir: workDir,
}, err
}
// prepareBundleDirectoryPermissions prepares the permissions of the bundle
// directory. When user namespaces are enabled, the permissions are modified
// to allow the remapped root GID to access the bundle.
func prepareBundleDirectoryPermissions(path string, spec []byte) error {
gid, err := remappedGID(spec)
if err != nil {
return err
}
if gid == 0 {
return nil
}
if err := os.Chown(path, -1, int(gid)); err != nil {
return err
}
return os.Chmod(path, 0710)
}
// ociSpecUserNS is a subset of specs.Spec used to reduce garbage during
// unmarshal.
type ociSpecUserNS struct {
Linux *linuxSpecUserNS
}
// linuxSpecUserNS is a subset of specs.Linux used to reduce garbage during
// unmarshal.
type linuxSpecUserNS struct {
GIDMappings []specs.LinuxIDMapping
}
// remappedGID reads the remapped GID 0 from the OCI spec, if it exists. If
// there is no remapping, remappedGID returns 0. If the spec cannot be parsed,
// remappedGID returns an error.
func remappedGID(spec []byte) (uint32, error) {
var ociSpec ociSpecUserNS
err := json.Unmarshal(spec, &ociSpec)
if err != nil {
return 0, err
}
if ociSpec.Linux == nil || len(ociSpec.Linux.GIDMappings) == 0 {
return 0, nil
}
for _, mapping := range ociSpec.Linux.GIDMappings {
if mapping.ContainerID == 0 {
return mapping.HostID, nil
}
}
return 0, nil
}
type bundle struct {
id string
path string
workDir string
}
// ShimOpt specifies shim options for initialization and connection
type ShimOpt func(*bundle, string, *runctypes.RuncOptions) (shim.Config, client.Opt)
// ShimRemote is a ShimOpt for connecting and starting a remote shim
func ShimRemote(c *Config, daemonAddress, cgroup string, exitHandler func()) ShimOpt {
return func(b *bundle, ns string, ropts *runctypes.RuncOptions) (shim.Config, client.Opt) {
config := b.shimConfig(ns, c, ropts)
return config,
client.WithStart(c.Shim, b.shimAddress(ns, daemonAddress), daemonAddress, cgroup, c.ShimDebug, exitHandler)
}
}
// ShimLocal is a ShimOpt for using an in process shim implementation
func ShimLocal(c *Config, exchange *exchange.Exchange) ShimOpt {
return func(b *bundle, ns string, ropts *runctypes.RuncOptions) (shim.Config, client.Opt) {
return b.shimConfig(ns, c, ropts), client.WithLocal(exchange)
}
}
// ShimConnect is a ShimOpt for connecting to an existing remote shim
func ShimConnect(c *Config, onClose func()) ShimOpt {
return func(b *bundle, ns string, ropts *runctypes.RuncOptions) (shim.Config, client.Opt) {
return b.shimConfig(ns, c, ropts), client.WithConnect(b.decideShimAddress(ns), onClose)
}
}
// NewShimClient connects to the shim managing the bundle and tasks creating it if needed
func (b *bundle) NewShimClient(ctx context.Context, namespace string, getClientOpts ShimOpt, runcOpts *runctypes.RuncOptions) (*client.Client, error) {
cfg, opt := getClientOpts(b, namespace, runcOpts)
return client.New(ctx, cfg, opt)
}
// Delete deletes the bundle from disk
func (b *bundle) Delete() error {
address, _ := b.loadAddress()
if address != "" {
// we don't care about errors here
client.RemoveSocket(address)
}
err := atomicDelete(b.path)
if err == nil {
return atomicDelete(b.workDir)
}
// error removing the bundle path; still attempt removing work dir
err2 := atomicDelete(b.workDir)
if err2 == nil {
return err
}
return errors.Wrapf(err, "Failed to remove both bundle and workdir locations: %v", err2)
}
func (b *bundle) legacyShimAddress(namespace string) string {
return filepath.Join(string(filepath.Separator), "containerd-shim", namespace, b.id, "shim.sock")
}
const socketRoot = "/run/containerd"
func (b *bundle) shimAddress(namespace, socketPath string) string {
d := sha256.Sum256([]byte(filepath.Join(socketPath, namespace, b.id)))
return fmt.Sprintf("unix://%s/%x", filepath.Join(socketRoot, "s"), d)
}
func (b *bundle) loadAddress() (string, error) {
addressPath := filepath.Join(b.path, "address")
data, err := ioutil.ReadFile(addressPath)
if err != nil {
return "", err
}
return string(data), nil
}
func (b *bundle) decideShimAddress(namespace string) string {
address, err := b.loadAddress()
if err != nil {
return b.legacyShimAddress(namespace)
}
return address
}
func (b *bundle) shimConfig(namespace string, c *Config, runcOptions *runctypes.RuncOptions) shim.Config {
var (
criuPath string
runtimeRoot = c.RuntimeRoot
systemdCgroup bool
)
if runcOptions != nil {
criuPath = runcOptions.CriuPath
systemdCgroup = runcOptions.SystemdCgroup
if runcOptions.RuntimeRoot != "" {
runtimeRoot = runcOptions.RuntimeRoot
}
}
return shim.Config{
Path: b.path,
WorkDir: b.workDir,
Namespace: namespace,
Criu: criuPath,
RuntimeRoot: runtimeRoot,
SystemdCgroup: systemdCgroup,
}
}
// atomicDelete renames the path to a hidden file before removal
func atomicDelete(path string) error {
// create a hidden dir for an atomic removal
atomicPath := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s", filepath.Base(path)))
if err := os.Rename(path, atomicPath); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return os.RemoveAll(atomicPath)
}

View file

@ -1,166 +0,0 @@
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package linux
import (
"context"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/runtime"
shim "github.com/containerd/containerd/runtime/v1/shim/v1"
"github.com/containerd/ttrpc"
"github.com/pkg/errors"
)
// Process implements a linux process
type Process struct {
id string
t *Task
}
// ID of the process
func (p *Process) ID() string {
return p.id
}
// Kill sends the provided signal to the underlying process
//
// Unable to kill all processes in the task using this method on a process
func (p *Process) Kill(ctx context.Context, signal uint32, _ bool) error {
_, err := p.t.shim.Kill(ctx, &shim.KillRequest{
Signal: signal,
ID: p.id,
})
if err != nil {
return errdefs.FromGRPC(err)
}
return err
}
// State of process
func (p *Process) State(ctx context.Context) (runtime.State, error) {
// use the container status for the status of the process
response, err := p.t.shim.State(ctx, &shim.StateRequest{
ID: p.id,
})
if err != nil {
if !errors.Is(err, ttrpc.ErrClosed) {
return runtime.State{}, errdefs.FromGRPC(err)
}
// We treat ttrpc.ErrClosed as the shim being closed, but really this
// likely means that the process no longer exists. We'll have to plumb
// the connection differently if this causes problems.
return runtime.State{}, errdefs.ErrNotFound
}
var status runtime.Status
switch response.Status {
case task.StatusCreated:
status = runtime.CreatedStatus
case task.StatusRunning:
status = runtime.RunningStatus
case task.StatusStopped:
status = runtime.StoppedStatus
case task.StatusPaused:
status = runtime.PausedStatus
case task.StatusPausing:
status = runtime.PausingStatus
}
return runtime.State{
Pid: response.Pid,
Status: status,
Stdin: response.Stdin,
Stdout: response.Stdout,
Stderr: response.Stderr,
Terminal: response.Terminal,
ExitStatus: response.ExitStatus,
}, nil
}
// ResizePty changes the side of the process's PTY to the provided width and height
func (p *Process) ResizePty(ctx context.Context, size runtime.ConsoleSize) error {
_, err := p.t.shim.ResizePty(ctx, &shim.ResizePtyRequest{
ID: p.id,
Width: size.Width,
Height: size.Height,
})
if err != nil {
err = errdefs.FromGRPC(err)
}
return err
}
// CloseIO closes the provided IO pipe for the process
func (p *Process) CloseIO(ctx context.Context) error {
_, err := p.t.shim.CloseIO(ctx, &shim.CloseIORequest{
ID: p.id,
Stdin: true,
})
if err != nil {
return errdefs.FromGRPC(err)
}
return nil
}
// Start the process
func (p *Process) Start(ctx context.Context) error {
r, err := p.t.shim.Start(ctx, &shim.StartRequest{
ID: p.id,
})
if err != nil {
return errdefs.FromGRPC(err)
}
p.t.events.Publish(ctx, runtime.TaskExecStartedEventTopic, &eventstypes.TaskExecStarted{
ContainerID: p.t.id,
Pid: r.Pid,
ExecID: p.id,
})
return nil
}
// Wait on the process to exit and return the exit status and timestamp
func (p *Process) Wait(ctx context.Context) (*runtime.Exit, error) {
r, err := p.t.shim.Wait(ctx, &shim.WaitRequest{
ID: p.id,
})
if err != nil {
return nil, err
}
return &runtime.Exit{
Timestamp: r.ExitedAt,
Status: r.ExitStatus,
}, nil
}
// Delete the process and return the exit status
func (p *Process) Delete(ctx context.Context) (*runtime.Exit, error) {
r, err := p.t.shim.DeleteProcess(ctx, &shim.DeleteProcessRequest{
ID: p.id,
})
if err != nil {
return nil, errdefs.FromGRPC(err)
}
return &runtime.Exit{
Status: r.ExitStatus,
Timestamp: r.ExitedAt,
Pid: r.Pid,
}, nil
}

View file

@ -1,534 +0,0 @@
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package linux
import (
"context"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"time"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/events/exchange"
"github.com/containerd/containerd/identifiers"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/metadata"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/pkg/process"
"github.com/containerd/containerd/platforms"
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/runtime/linux/runctypes"
v1 "github.com/containerd/containerd/runtime/v1"
shim "github.com/containerd/containerd/runtime/v1/shim/v1"
runc "github.com/containerd/go-runc"
"github.com/containerd/typeurl"
ptypes "github.com/gogo/protobuf/types"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
var (
pluginID = fmt.Sprintf("%s.%s", plugin.RuntimePlugin, "linux")
empty = &ptypes.Empty{}
)
const (
configFilename = "config.json"
defaultRuntime = "runc"
defaultShim = "containerd-shim"
// cleanupTimeout is default timeout for cleanup operations
cleanupTimeout = 1 * time.Minute
)
func init() {
plugin.Register(&plugin.Registration{
Type: plugin.RuntimePlugin,
ID: "linux",
InitFn: New,
Requires: []plugin.Type{
plugin.MetadataPlugin,
},
Config: &Config{
Shim: defaultShim,
Runtime: defaultRuntime,
},
})
}
var _ = (runtime.PlatformRuntime)(&Runtime{})
// Config options for the runtime
type Config struct {
// Shim is a path or name of binary implementing the Shim GRPC API
Shim string `toml:"shim"`
// Runtime is a path or name of an OCI runtime used by the shim
Runtime string `toml:"runtime"`
// RuntimeRoot is the path that shall be used by the OCI runtime for its data
RuntimeRoot string `toml:"runtime_root"`
// NoShim calls runc directly from within the pkg
NoShim bool `toml:"no_shim"`
// Debug enable debug on the shim
ShimDebug bool `toml:"shim_debug"`
}
// New returns a configured runtime
func New(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Platforms = []ocispec.Platform{platforms.DefaultSpec()}
if err := os.MkdirAll(ic.Root, 0711); err != nil {
return nil, err
}
if err := os.MkdirAll(ic.State, 0711); err != nil {
return nil, err
}
m, err := ic.Get(plugin.MetadataPlugin)
if err != nil {
return nil, err
}
cfg := ic.Config.(*Config)
r := &Runtime{
root: ic.Root,
state: ic.State,
tasks: runtime.NewTaskList(),
containers: metadata.NewContainerStore(m.(*metadata.DB)),
address: ic.Address,
events: ic.Events,
config: cfg,
}
tasks, err := r.restoreTasks(ic.Context)
if err != nil {
return nil, err
}
for _, t := range tasks {
if err := r.tasks.AddWithNamespace(t.namespace, t); err != nil {
return nil, err
}
}
return r, nil
}
// Runtime for a linux based system
type Runtime struct {
root string
state string
address string
tasks *runtime.TaskList
containers containers.Store
events *exchange.Exchange
config *Config
}
// ID of the runtime
func (r *Runtime) ID() string {
return pluginID
}
// Create a new task
func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts) (_ runtime.Task, err error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
if err := identifiers.Validate(id); err != nil {
return nil, errors.Wrapf(err, "invalid task id")
}
ropts, err := r.getRuncOptions(ctx, id)
if err != nil {
return nil, err
}
bundle, err := newBundle(id,
filepath.Join(r.state, namespace),
filepath.Join(r.root, namespace),
opts.Spec.Value)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
bundle.Delete()
}
}()
shimopt := ShimLocal(r.config, r.events)
if !r.config.NoShim {
var cgroup string
if opts.TaskOptions != nil {
v, err := typeurl.UnmarshalAny(opts.TaskOptions)
if err != nil {
return nil, err
}
cgroup = v.(*runctypes.CreateOptions).ShimCgroup
}
exitHandler := func() {
log.G(ctx).WithField("id", id).Info("shim reaped")
if _, err := r.tasks.Get(ctx, id); err != nil {
// Task was never started or was already successfully deleted
return
}
if err = r.cleanupAfterDeadShim(context.Background(), bundle, namespace, id); err != nil {
log.G(ctx).WithError(err).WithFields(logrus.Fields{
"id": id,
"namespace": namespace,
}).Warn("failed to clean up after killed shim")
}
}
shimopt = ShimRemote(r.config, r.address, cgroup, exitHandler)
}
s, err := bundle.NewShimClient(ctx, namespace, shimopt, ropts)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
deferCtx, deferCancel := context.WithTimeout(
namespaces.WithNamespace(context.TODO(), namespace), cleanupTimeout)
defer deferCancel()
if kerr := s.KillShim(deferCtx); kerr != nil {
log.G(ctx).WithError(kerr).Error("failed to kill shim")
}
}
}()
rt := r.config.Runtime
if ropts != nil && ropts.Runtime != "" {
rt = ropts.Runtime
}
sopts := &shim.CreateTaskRequest{
ID: id,
Bundle: bundle.path,
Runtime: rt,
Stdin: opts.IO.Stdin,
Stdout: opts.IO.Stdout,
Stderr: opts.IO.Stderr,
Terminal: opts.IO.Terminal,
Checkpoint: opts.Checkpoint,
Options: opts.TaskOptions,
}
for _, m := range opts.Rootfs {
sopts.Rootfs = append(sopts.Rootfs, &types.Mount{
Type: m.Type,
Source: m.Source,
Options: m.Options,
})
}
cr, err := s.Create(ctx, sopts)
if err != nil {
return nil, errdefs.FromGRPC(err)
}
t, err := newTask(id, namespace, int(cr.Pid), s, r.events, r.tasks, bundle)
if err != nil {
return nil, err
}
if err := r.tasks.Add(ctx, t); err != nil {
return nil, err
}
r.events.Publish(ctx, runtime.TaskCreateEventTopic, &eventstypes.TaskCreate{
ContainerID: sopts.ID,
Bundle: sopts.Bundle,
Rootfs: sopts.Rootfs,
IO: &eventstypes.TaskIO{
Stdin: sopts.Stdin,
Stdout: sopts.Stdout,
Stderr: sopts.Stderr,
Terminal: sopts.Terminal,
},
Checkpoint: sopts.Checkpoint,
Pid: uint32(t.pid),
})
return t, nil
}
// Tasks returns all tasks known to the runtime
func (r *Runtime) Tasks(ctx context.Context, all bool) ([]runtime.Task, error) {
return r.tasks.GetAll(ctx, all)
}
func (r *Runtime) restoreTasks(ctx context.Context) ([]*Task, error) {
dir, err := ioutil.ReadDir(r.state)
if err != nil {
return nil, err
}
var o []*Task
for _, namespace := range dir {
if !namespace.IsDir() {
continue
}
name := namespace.Name()
// skip hidden directories
if len(name) > 0 && name[0] == '.' {
continue
}
log.G(ctx).WithField("namespace", name).Debug("loading tasks in namespace")
tasks, err := r.loadTasks(ctx, name)
if err != nil {
return nil, err
}
o = append(o, tasks...)
}
return o, nil
}
// Get a specific task by task id
func (r *Runtime) Get(ctx context.Context, id string) (runtime.Task, error) {
return r.tasks.Get(ctx, id)
}
// Add a runtime task
func (r *Runtime) Add(ctx context.Context, task runtime.Task) error {
return r.tasks.Add(ctx, task)
}
// Delete a runtime task
func (r *Runtime) Delete(ctx context.Context, id string) {
r.tasks.Delete(ctx, id)
}
func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) {
dir, err := ioutil.ReadDir(filepath.Join(r.state, ns))
if err != nil {
return nil, err
}
var o []*Task
for _, path := range dir {
if !path.IsDir() {
continue
}
id := path.Name()
// skip hidden directories
if len(id) > 0 && id[0] == '.' {
continue
}
bundle := loadBundle(
id,
filepath.Join(r.state, ns, id),
filepath.Join(r.root, ns, id),
)
ctx = namespaces.WithNamespace(ctx, ns)
pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, process.InitPidFile))
shimExit := make(chan struct{})
s, err := bundle.NewShimClient(ctx, ns, ShimConnect(r.config, func() {
defer close(shimExit)
if _, err := r.tasks.Get(ctx, id); err != nil {
// Task was never started or was already successfully deleted
return
}
if err := r.cleanupAfterDeadShim(ctx, bundle, ns, id); err != nil {
log.G(ctx).WithError(err).WithField("bundle", bundle.path).
Error("cleaning up after dead shim")
}
}), nil)
if err != nil {
log.G(ctx).WithError(err).WithFields(logrus.Fields{
"id": id,
"namespace": ns,
}).Error("connecting to shim")
err := r.cleanupAfterDeadShim(ctx, bundle, ns, id)
if err != nil {
log.G(ctx).WithError(err).WithField("bundle", bundle.path).
Error("cleaning up after dead shim")
}
continue
}
logDirPath := filepath.Join(r.root, ns, id)
copyAndClose := func(dst io.Writer, src io.ReadWriteCloser) {
copyDone := make(chan struct{})
go func() {
io.Copy(dst, src)
close(copyDone)
}()
select {
case <-shimExit:
case <-copyDone:
}
src.Close()
}
shimStdoutLog, err := v1.OpenShimStdoutLog(ctx, logDirPath)
if err != nil {
log.G(ctx).WithError(err).WithFields(logrus.Fields{
"id": id,
"namespace": ns,
"logDirPath": logDirPath,
}).Error("opening shim stdout log pipe")
continue
}
if r.config.ShimDebug {
go copyAndClose(os.Stdout, shimStdoutLog)
} else {
go copyAndClose(ioutil.Discard, shimStdoutLog)
}
shimStderrLog, err := v1.OpenShimStderrLog(ctx, logDirPath)
if err != nil {
log.G(ctx).WithError(err).WithFields(logrus.Fields{
"id": id,
"namespace": ns,
"logDirPath": logDirPath,
}).Error("opening shim stderr log pipe")
continue
}
if r.config.ShimDebug {
go copyAndClose(os.Stderr, shimStderrLog)
} else {
go copyAndClose(ioutil.Discard, shimStderrLog)
}
t, err := newTask(id, ns, pid, s, r.events, r.tasks, bundle)
if err != nil {
log.G(ctx).WithError(err).Error("loading task type")
continue
}
o = append(o, t)
}
return o, nil
}
func (r *Runtime) cleanupAfterDeadShim(ctx context.Context, bundle *bundle, ns, id string) error {
log.G(ctx).WithFields(logrus.Fields{
"id": id,
"namespace": ns,
}).Warn("cleaning up after shim dead")
pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, process.InitPidFile))
ctx = namespaces.WithNamespace(ctx, ns)
if err := r.terminate(ctx, bundle, ns, id); err != nil {
if r.config.ShimDebug {
return errors.Wrap(err, "failed to terminate task, leaving bundle for debugging")
}
log.G(ctx).WithError(err).Warn("failed to terminate task")
}
// Notify Client
exitedAt := time.Now().UTC()
r.events.Publish(ctx, runtime.TaskExitEventTopic, &eventstypes.TaskExit{
ContainerID: id,
ID: id,
Pid: uint32(pid),
ExitStatus: 128 + uint32(unix.SIGKILL),
ExitedAt: exitedAt,
})
r.tasks.Delete(ctx, id)
if err := bundle.Delete(); err != nil {
log.G(ctx).WithError(err).Error("delete bundle")
}
// kill shim
if shimPid, err := runc.ReadPidFile(filepath.Join(bundle.path, "shim.pid")); err == nil && shimPid > 0 {
unix.Kill(shimPid, unix.SIGKILL)
}
r.events.Publish(ctx, runtime.TaskDeleteEventTopic, &eventstypes.TaskDelete{
ContainerID: id,
Pid: uint32(pid),
ExitStatus: 128 + uint32(unix.SIGKILL),
ExitedAt: exitedAt,
})
return nil
}
func (r *Runtime) terminate(ctx context.Context, bundle *bundle, ns, id string) error {
rt, err := r.getRuntime(ctx, ns, id)
if err != nil {
return err
}
if err := rt.Delete(ctx, id, &runc.DeleteOpts{
Force: true,
}); err != nil {
log.G(ctx).WithError(err).Warnf("delete runtime state %s", id)
}
if err := mount.Unmount(filepath.Join(bundle.path, "rootfs"), 0); err != nil {
log.G(ctx).WithError(err).WithFields(logrus.Fields{
"path": bundle.path,
"id": id,
}).Warnf("unmount task rootfs")
}
return nil
}
func (r *Runtime) getRuntime(ctx context.Context, ns, id string) (*runc.Runc, error) {
ropts, err := r.getRuncOptions(ctx, id)
if err != nil {
return nil, err
}
var (
cmd = r.config.Runtime
root = process.RuncRoot
)
if ropts != nil {
if ropts.Runtime != "" {
cmd = ropts.Runtime
}
if ropts.RuntimeRoot != "" {
root = ropts.RuntimeRoot
}
}
return &runc.Runc{
Command: cmd,
LogFormat: runc.JSON,
PdeathSignal: unix.SIGKILL,
Root: filepath.Join(root, ns),
Debug: r.config.ShimDebug,
}, nil
}
func (r *Runtime) getRuncOptions(ctx context.Context, id string) (*runctypes.RuncOptions, error) {
container, err := r.containers.Get(ctx, id)
if err != nil {
return nil, err
}
if container.Runtime.Options != nil {
v, err := typeurl.UnmarshalAny(container.Runtime.Options)
if err != nil {
return nil, err
}
ropts, ok := v.(*runctypes.RuncOptions)
if !ok {
return nil, errors.New("invalid runtime options format")
}
return ropts, nil
}
return &runctypes.RuncOptions{}, nil
}

View file

@ -1,366 +0,0 @@
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package linux
import (
"context"
"sync"
"github.com/containerd/cgroups"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/events/exchange"
"github.com/containerd/containerd/identifiers"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/runtime/v1/shim/client"
"github.com/containerd/containerd/runtime/v1/shim/v1"
"github.com/containerd/ttrpc"
"github.com/containerd/typeurl"
"github.com/gogo/protobuf/types"
"github.com/pkg/errors"
)
// Task on a linux based system
type Task struct {
mu sync.Mutex
id string
pid int
shim *client.Client
namespace string
cg cgroups.Cgroup
events *exchange.Exchange
tasks *runtime.TaskList
bundle *bundle
}
func newTask(id, namespace string, pid int, shim *client.Client, events *exchange.Exchange, list *runtime.TaskList, bundle *bundle) (*Task, error) {
var (
err error
cg cgroups.Cgroup
)
if pid > 0 {
cg, err = cgroups.Load(cgroups.V1, cgroups.PidPath(pid))
if err != nil && err != cgroups.ErrCgroupDeleted {
return nil, err
}
}
return &Task{
id: id,
pid: pid,
shim: shim,
namespace: namespace,
cg: cg,
events: events,
tasks: list,
bundle: bundle,
}, nil
}
// ID of the task
func (t *Task) ID() string {
return t.id
}
// Namespace of the task
func (t *Task) Namespace() string {
return t.namespace
}
// PID of the task
func (t *Task) PID() uint32 {
return uint32(t.pid)
}
// Delete the task and return the exit status
func (t *Task) Delete(ctx context.Context) (*runtime.Exit, error) {
rsp, shimErr := t.shim.Delete(ctx, empty)
if shimErr != nil {
shimErr = errdefs.FromGRPC(shimErr)
if !errdefs.IsNotFound(shimErr) {
return nil, shimErr
}
}
t.tasks.Delete(ctx, t.id)
if err := t.shim.KillShim(ctx); err != nil {
log.G(ctx).WithError(err).Error("failed to kill shim")
}
if err := t.bundle.Delete(); err != nil {
log.G(ctx).WithError(err).Error("failed to delete bundle")
}
if shimErr != nil {
return nil, shimErr
}
t.events.Publish(ctx, runtime.TaskDeleteEventTopic, &eventstypes.TaskDelete{
ContainerID: t.id,
ExitStatus: rsp.ExitStatus,
ExitedAt: rsp.ExitedAt,
Pid: rsp.Pid,
})
return &runtime.Exit{
Status: rsp.ExitStatus,
Timestamp: rsp.ExitedAt,
Pid: rsp.Pid,
}, nil
}
// Start the task
func (t *Task) Start(ctx context.Context) error {
t.mu.Lock()
hasCgroup := t.cg != nil
t.mu.Unlock()
r, err := t.shim.Start(ctx, &shim.StartRequest{
ID: t.id,
})
if err != nil {
return errdefs.FromGRPC(err)
}
t.pid = int(r.Pid)
if !hasCgroup {
cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(t.pid))
if err != nil && err != cgroups.ErrCgroupDeleted {
return err
}
t.mu.Lock()
if err == cgroups.ErrCgroupDeleted {
t.cg = nil
} else {
t.cg = cg
}
t.mu.Unlock()
}
t.events.Publish(ctx, runtime.TaskStartEventTopic, &eventstypes.TaskStart{
ContainerID: t.id,
Pid: uint32(t.pid),
})
return nil
}
// State returns runtime information for the task
func (t *Task) State(ctx context.Context) (runtime.State, error) {
response, err := t.shim.State(ctx, &shim.StateRequest{
ID: t.id,
})
if err != nil {
if !errors.Is(err, ttrpc.ErrClosed) {
return runtime.State{}, errdefs.FromGRPC(err)
}
return runtime.State{}, errdefs.ErrNotFound
}
var status runtime.Status
switch response.Status {
case task.StatusCreated:
status = runtime.CreatedStatus
case task.StatusRunning:
status = runtime.RunningStatus
case task.StatusStopped:
status = runtime.StoppedStatus
case task.StatusPaused:
status = runtime.PausedStatus
case task.StatusPausing:
status = runtime.PausingStatus
}
return runtime.State{
Pid: response.Pid,
Status: status,
Stdin: response.Stdin,
Stdout: response.Stdout,
Stderr: response.Stderr,
Terminal: response.Terminal,
ExitStatus: response.ExitStatus,
ExitedAt: response.ExitedAt,
}, nil
}
// Pause the task and all processes
func (t *Task) Pause(ctx context.Context) error {
if _, err := t.shim.Pause(ctx, empty); err != nil {
return errdefs.FromGRPC(err)
}
t.events.Publish(ctx, runtime.TaskPausedEventTopic, &eventstypes.TaskPaused{
ContainerID: t.id,
})
return nil
}
// Resume the task and all processes
func (t *Task) Resume(ctx context.Context) error {
if _, err := t.shim.Resume(ctx, empty); err != nil {
return errdefs.FromGRPC(err)
}
t.events.Publish(ctx, runtime.TaskResumedEventTopic, &eventstypes.TaskResumed{
ContainerID: t.id,
})
return nil
}
// Kill the task using the provided signal
//
// Optionally send the signal to all processes that are a child of the task
func (t *Task) Kill(ctx context.Context, signal uint32, all bool) error {
if _, err := t.shim.Kill(ctx, &shim.KillRequest{
ID: t.id,
Signal: signal,
All: all,
}); err != nil {
return errdefs.FromGRPC(err)
}
return nil
}
// Exec creates a new process inside the task
func (t *Task) Exec(ctx context.Context, id string, opts runtime.ExecOpts) (runtime.Process, error) {
if err := identifiers.Validate(id); err != nil {
return nil, errors.Wrapf(err, "invalid exec id")
}
request := &shim.ExecProcessRequest{
ID: id,
Stdin: opts.IO.Stdin,
Stdout: opts.IO.Stdout,
Stderr: opts.IO.Stderr,
Terminal: opts.IO.Terminal,
Spec: opts.Spec,
}
if _, err := t.shim.Exec(ctx, request); err != nil {
return nil, errdefs.FromGRPC(err)
}
return &Process{
id: id,
t: t,
}, nil
}
// Pids returns all system level process ids running inside the task
func (t *Task) Pids(ctx context.Context) ([]runtime.ProcessInfo, error) {
resp, err := t.shim.ListPids(ctx, &shim.ListPidsRequest{
ID: t.id,
})
if err != nil {
return nil, errdefs.FromGRPC(err)
}
var processList []runtime.ProcessInfo
for _, p := range resp.Processes {
processList = append(processList, runtime.ProcessInfo{
Pid: p.Pid,
Info: p.Info,
})
}
return processList, nil
}
// ResizePty changes the side of the task's PTY to the provided width and height
func (t *Task) ResizePty(ctx context.Context, size runtime.ConsoleSize) error {
_, err := t.shim.ResizePty(ctx, &shim.ResizePtyRequest{
ID: t.id,
Width: size.Width,
Height: size.Height,
})
if err != nil {
err = errdefs.FromGRPC(err)
}
return err
}
// CloseIO closes the provided IO on the task
func (t *Task) CloseIO(ctx context.Context) error {
_, err := t.shim.CloseIO(ctx, &shim.CloseIORequest{
ID: t.id,
Stdin: true,
})
if err != nil {
err = errdefs.FromGRPC(err)
}
return err
}
// Checkpoint creates a system level dump of the task and process information that can be later restored
func (t *Task) Checkpoint(ctx context.Context, path string, options *types.Any) error {
r := &shim.CheckpointTaskRequest{
Path: path,
Options: options,
}
if _, err := t.shim.Checkpoint(ctx, r); err != nil {
return errdefs.FromGRPC(err)
}
t.events.Publish(ctx, runtime.TaskCheckpointedEventTopic, &eventstypes.TaskCheckpointed{
ContainerID: t.id,
})
return nil
}
// Update changes runtime information of a running task
func (t *Task) Update(ctx context.Context, resources *types.Any, _ map[string]string) error {
if _, err := t.shim.Update(ctx, &shim.UpdateTaskRequest{
Resources: resources,
}); err != nil {
return errdefs.FromGRPC(err)
}
return nil
}
// Process returns a specific process inside the task by the process id
func (t *Task) Process(ctx context.Context, id string) (runtime.Process, error) {
p := &Process{
id: id,
t: t,
}
if _, err := p.State(ctx); err != nil {
return nil, err
}
return p, nil
}
// Stats returns runtime specific system level metric information for the task
func (t *Task) Stats(ctx context.Context) (*types.Any, error) {
t.mu.Lock()
defer t.mu.Unlock()
if t.cg == nil {
return nil, errors.Wrap(errdefs.ErrNotFound, "cgroup does not exist")
}
stats, err := t.cg.Stat(cgroups.IgnoreNotExist)
if err != nil {
return nil, err
}
return typeurl.MarshalAny(stats)
}
// Cgroup returns the underlying cgroup for a linux task
func (t *Task) Cgroup() (cgroups.Cgroup, error) {
t.mu.Lock()
defer t.mu.Unlock()
if t.cg == nil {
return nil, errors.Wrap(errdefs.ErrNotFound, "cgroup does not exist")
}
return t.cg, nil
}
// Wait for the task to exit returning the status and timestamp
func (t *Task) Wait(ctx context.Context) (*runtime.Exit, error) {
r, err := t.shim.Wait(ctx, &shim.WaitRequest{
ID: t.id,
})
if err != nil {
return nil, err
}
return &runtime.Exit{
Timestamp: r.ExitedAt,
Status: r.ExitStatus,
}, nil
}

View file

@ -1,38 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
"context"
"io"
"path/filepath"
"github.com/containerd/fifo"
"golang.org/x/sys/unix"
)
// OpenShimStdoutLog opens the shim log for reading
func OpenShimStdoutLog(ctx context.Context, logDirPath string) (io.ReadWriteCloser, error) {
return fifo.OpenFifo(ctx, filepath.Join(logDirPath, "shim.stdout.log"), unix.O_RDWR|unix.O_CREAT, 0700)
}
// OpenShimStderrLog opens the shim log
func OpenShimStderrLog(ctx context.Context, logDirPath string) (io.ReadWriteCloser, error) {
return fifo.OpenFifo(ctx, filepath.Join(logDirPath, "shim.stderr.log"), unix.O_RDWR|unix.O_CREAT, 0700)
}

View file

@ -1,431 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package client
import (
"context"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
"golang.org/x/sys/unix"
"github.com/containerd/ttrpc"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/pkg/dialer"
v1 "github.com/containerd/containerd/runtime/v1"
"github.com/containerd/containerd/runtime/v1/shim"
shimapi "github.com/containerd/containerd/runtime/v1/shim/v1"
"github.com/containerd/containerd/sys"
ptypes "github.com/gogo/protobuf/types"
)
var empty = &ptypes.Empty{}
// Opt is an option for a shim client configuration
type Opt func(context.Context, shim.Config) (shimapi.ShimService, io.Closer, error)
// WithStart executes a new shim process
func WithStart(binary, address, daemonAddress, cgroup string, debug bool, exitHandler func()) Opt {
return func(ctx context.Context, config shim.Config) (_ shimapi.ShimService, _ io.Closer, err error) {
socket, err := newSocket(address)
if err != nil {
if !eaddrinuse(err) {
return nil, nil, err
}
if err := RemoveSocket(address); err != nil {
return nil, nil, errors.Wrap(err, "remove already used socket")
}
if socket, err = newSocket(address); err != nil {
return nil, nil, err
}
}
f, err := socket.File()
if err != nil {
return nil, nil, errors.Wrapf(err, "failed to get fd for socket %s", address)
}
defer f.Close()
stdoutCopy := ioutil.Discard
stderrCopy := ioutil.Discard
stdoutLog, err := v1.OpenShimStdoutLog(ctx, config.WorkDir)
if err != nil {
return nil, nil, errors.Wrapf(err, "failed to create stdout log")
}
stderrLog, err := v1.OpenShimStderrLog(ctx, config.WorkDir)
if err != nil {
return nil, nil, errors.Wrapf(err, "failed to create stderr log")
}
if debug {
stdoutCopy = os.Stdout
stderrCopy = os.Stderr
}
go io.Copy(stdoutCopy, stdoutLog)
go io.Copy(stderrCopy, stderrLog)
cmd, err := newCommand(binary, daemonAddress, debug, config, f, stdoutLog, stderrLog)
if err != nil {
return nil, nil, err
}
if err := cmd.Start(); err != nil {
return nil, nil, errors.Wrapf(err, "failed to start shim")
}
defer func() {
if err != nil {
cmd.Process.Kill()
}
}()
go func() {
cmd.Wait()
exitHandler()
if stdoutLog != nil {
stdoutLog.Close()
}
if stderrLog != nil {
stderrLog.Close()
}
socket.Close()
RemoveSocket(address)
}()
log.G(ctx).WithFields(logrus.Fields{
"pid": cmd.Process.Pid,
"address": address,
"debug": debug,
}).Infof("shim %s started", binary)
if err := writeFile(filepath.Join(config.Path, "address"), address); err != nil {
return nil, nil, err
}
if err := writeFile(filepath.Join(config.Path, "shim.pid"), strconv.Itoa(cmd.Process.Pid)); err != nil {
return nil, nil, err
}
// set shim in cgroup if it is provided
if cgroup != "" {
if err := setCgroup(cgroup, cmd); err != nil {
return nil, nil, err
}
log.G(ctx).WithFields(logrus.Fields{
"pid": cmd.Process.Pid,
"address": address,
}).Infof("shim placed in cgroup %s", cgroup)
}
if err = setupOOMScore(cmd.Process.Pid); err != nil {
return nil, nil, err
}
c, clo, err := WithConnect(address, func() {})(ctx, config)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to connect")
}
return c, clo, nil
}
}
func eaddrinuse(err error) bool {
cause := errors.Cause(err)
netErr, ok := cause.(*net.OpError)
if !ok {
return false
}
if netErr.Op != "listen" {
return false
}
syscallErr, ok := netErr.Err.(*os.SyscallError)
if !ok {
return false
}
errno, ok := syscallErr.Err.(syscall.Errno)
if !ok {
return false
}
return errno == syscall.EADDRINUSE
}
// setupOOMScore gets containerd's oom score and adds +1 to it
// to ensure a shim has a lower* score than the daemons
// if not already at the maximum OOM Score
func setupOOMScore(shimPid int) error {
pid := os.Getpid()
score, err := sys.GetOOMScoreAdj(pid)
if err != nil {
return errors.Wrap(err, "get daemon OOM score")
}
shimScore := score + 1
if err := sys.AdjustOOMScore(shimPid, shimScore); err != nil {
return errors.Wrap(err, "set shim OOM score")
}
return nil
}
func newCommand(binary, daemonAddress string, debug bool, config shim.Config, socket *os.File, stdout, stderr io.Writer) (*exec.Cmd, error) {
selfExe, err := os.Executable()
if err != nil {
return nil, err
}
args := []string{
"-namespace", config.Namespace,
"-workdir", config.WorkDir,
"-address", daemonAddress,
"-containerd-binary", selfExe,
}
if config.Criu != "" {
args = append(args, "-criu-path", config.Criu)
}
if config.RuntimeRoot != "" {
args = append(args, "-runtime-root", config.RuntimeRoot)
}
if config.SystemdCgroup {
args = append(args, "-systemd-cgroup")
}
if debug {
args = append(args, "-debug")
}
cmd := exec.Command(binary, args...)
cmd.Dir = config.Path
// make sure the shim can be re-parented to system init
// and is cloned in a new mount namespace because the overlay/filesystems
// will be mounted by the shim
cmd.SysProcAttr = getSysProcAttr()
cmd.ExtraFiles = append(cmd.ExtraFiles, socket)
cmd.Env = append(os.Environ(), "GOMAXPROCS=2")
cmd.Stdout = stdout
cmd.Stderr = stderr
return cmd, nil
}
// writeFile writes a address file atomically
func writeFile(path, address string) error {
path, err := filepath.Abs(path)
if err != nil {
return err
}
tempPath := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s", filepath.Base(path)))
f, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
if err != nil {
return err
}
_, err = f.WriteString(address)
f.Close()
if err != nil {
return err
}
return os.Rename(tempPath, path)
}
const (
abstractSocketPrefix = "\x00"
socketPathLimit = 106
)
type socket string
func (s socket) isAbstract() bool {
return !strings.HasPrefix(string(s), "unix://")
}
func (s socket) path() string {
path := strings.TrimPrefix(string(s), "unix://")
// if there was no trim performed, we assume an abstract socket
if len(path) == len(s) {
path = abstractSocketPrefix + path
}
return path
}
func newSocket(address string) (*net.UnixListener, error) {
if len(address) > socketPathLimit {
return nil, errors.Errorf("%q: unix socket path too long (> %d)", address, socketPathLimit)
}
var (
sock = socket(address)
path = sock.path()
)
if !sock.isAbstract() {
if err := os.MkdirAll(filepath.Dir(path), 0600); err != nil {
return nil, errors.Wrapf(err, "%s", path)
}
}
l, err := net.Listen("unix", path)
if err != nil {
return nil, errors.Wrapf(err, "failed to listen to unix socket %q (abstract: %t)", address, sock.isAbstract())
}
if err := os.Chmod(path, 0600); err != nil {
l.Close()
return nil, err
}
return l.(*net.UnixListener), nil
}
// RemoveSocket removes the socket at the specified address if
// it exists on the filesystem
func RemoveSocket(address string) error {
sock := socket(address)
if !sock.isAbstract() {
return os.Remove(sock.path())
}
return nil
}
func connect(address string, d func(string, time.Duration) (net.Conn, error)) (net.Conn, error) {
return d(address, 100*time.Second)
}
func anonDialer(address string, timeout time.Duration) (net.Conn, error) {
return dialer.Dialer(socket(address).path(), timeout)
}
// WithConnect connects to an existing shim
func WithConnect(address string, onClose func()) Opt {
return func(ctx context.Context, config shim.Config) (shimapi.ShimService, io.Closer, error) {
conn, err := connect(address, anonDialer)
if err != nil {
return nil, nil, err
}
client := ttrpc.NewClient(conn, ttrpc.WithOnClose(onClose))
return shimapi.NewShimClient(client), conn, nil
}
}
// WithLocal uses an in process shim
func WithLocal(publisher events.Publisher) func(context.Context, shim.Config) (shimapi.ShimService, io.Closer, error) {
return func(ctx context.Context, config shim.Config) (shimapi.ShimService, io.Closer, error) {
service, err := shim.NewService(config, publisher)
if err != nil {
return nil, nil, err
}
return shim.NewLocal(service), nil, nil
}
}
// New returns a new shim client
func New(ctx context.Context, config shim.Config, opt Opt) (*Client, error) {
s, c, err := opt(ctx, config)
if err != nil {
return nil, err
}
return &Client{
ShimService: s,
c: c,
exitCh: make(chan struct{}),
}, nil
}
// Client is a shim client containing the connection to a shim
type Client struct {
shimapi.ShimService
c io.Closer
exitCh chan struct{}
exitOnce sync.Once
}
// IsAlive returns true if the shim can be contacted.
// NOTE: a negative answer doesn't mean that the process is gone.
func (c *Client) IsAlive(ctx context.Context) (bool, error) {
_, err := c.ShimInfo(ctx, empty)
if err != nil {
// TODO(stevvooe): There are some error conditions that need to be
// handle with unix sockets existence to give the right answer here.
return false, err
}
return true, nil
}
// StopShim signals the shim to exit and wait for the process to disappear
func (c *Client) StopShim(ctx context.Context) error {
return c.signalShim(ctx, unix.SIGTERM)
}
// KillShim kills the shim forcefully and wait for the process to disappear
func (c *Client) KillShim(ctx context.Context) error {
return c.signalShim(ctx, unix.SIGKILL)
}
// Close the client connection
func (c *Client) Close() error {
if c.c == nil {
return nil
}
return c.c.Close()
}
func (c *Client) signalShim(ctx context.Context, sig syscall.Signal) error {
info, err := c.ShimInfo(ctx, empty)
if err != nil {
return err
}
pid := int(info.ShimPid)
// make sure we don't kill ourselves if we are running a local shim
if os.Getpid() == pid {
return nil
}
if err := unix.Kill(pid, sig); err != nil && err != unix.ESRCH {
return err
}
// wait for shim to die after being signaled
select {
case <-ctx.Done():
return ctx.Err()
case <-c.waitForExit(ctx, pid):
return nil
}
}
func (c *Client) waitForExit(ctx context.Context, pid int) <-chan struct{} {
go c.exitOnce.Do(func() {
defer close(c.exitCh)
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
for {
// use kill(pid, 0) here because the shim could have been reparented
// and we are no longer able to waitpid(pid, ...) on the shim
if err := unix.Kill(pid, 0); err == unix.ESRCH {
return
}
select {
case <-ticker.C:
case <-ctx.Done():
log.G(ctx).WithField("pid", pid).Warn("timed out while waiting for shim to exit")
return
}
}
})
return c.exitCh
}

View file

@ -1,46 +0,0 @@
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package client
import (
"os/exec"
"syscall"
"github.com/containerd/cgroups"
"github.com/pkg/errors"
)
func getSysProcAttr() *syscall.SysProcAttr {
return &syscall.SysProcAttr{
Setpgid: true,
}
}
func setCgroup(cgroupPath string, cmd *exec.Cmd) error {
cg, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(cgroupPath))
if err != nil {
return errors.Wrapf(err, "failed to load cgroup %s", cgroupPath)
}
if err := cg.Add(cgroups.Process{
Pid: cmd.Process.Pid,
}); err != nil {
return errors.Wrapf(err, "failed to join cgroup %s", cgroupPath)
}
return nil
}

View file

@ -1,34 +0,0 @@
// +build !linux,!windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package client
import (
"os/exec"
"syscall"
)
func getSysProcAttr() *syscall.SysProcAttr {
return &syscall.SysProcAttr{
Setpgid: true,
}
}
func setCgroup(cgroupPath string, cmd *exec.Cmd) error {
return nil
}

View file

@ -1,107 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package shim
import (
"context"
"path/filepath"
"github.com/containerd/containerd/mount"
shimapi "github.com/containerd/containerd/runtime/v1/shim/v1"
ptypes "github.com/gogo/protobuf/types"
)
// NewLocal returns a shim client implementation for issue commands to a shim
func NewLocal(s *Service) shimapi.ShimService {
return &local{
s: s,
}
}
type local struct {
s *Service
}
func (c *local) Create(ctx context.Context, in *shimapi.CreateTaskRequest) (*shimapi.CreateTaskResponse, error) {
return c.s.Create(ctx, in)
}
func (c *local) Start(ctx context.Context, in *shimapi.StartRequest) (*shimapi.StartResponse, error) {
return c.s.Start(ctx, in)
}
func (c *local) Delete(ctx context.Context, in *ptypes.Empty) (*shimapi.DeleteResponse, error) {
// make sure we unmount the containers rootfs for this local
if err := mount.Unmount(filepath.Join(c.s.config.Path, "rootfs"), 0); err != nil {
return nil, err
}
return c.s.Delete(ctx, in)
}
func (c *local) DeleteProcess(ctx context.Context, in *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) {
return c.s.DeleteProcess(ctx, in)
}
func (c *local) Exec(ctx context.Context, in *shimapi.ExecProcessRequest) (*ptypes.Empty, error) {
return c.s.Exec(ctx, in)
}
func (c *local) ResizePty(ctx context.Context, in *shimapi.ResizePtyRequest) (*ptypes.Empty, error) {
return c.s.ResizePty(ctx, in)
}
func (c *local) State(ctx context.Context, in *shimapi.StateRequest) (*shimapi.StateResponse, error) {
return c.s.State(ctx, in)
}
func (c *local) Pause(ctx context.Context, in *ptypes.Empty) (*ptypes.Empty, error) {
return c.s.Pause(ctx, in)
}
func (c *local) Resume(ctx context.Context, in *ptypes.Empty) (*ptypes.Empty, error) {
return c.s.Resume(ctx, in)
}
func (c *local) Kill(ctx context.Context, in *shimapi.KillRequest) (*ptypes.Empty, error) {
return c.s.Kill(ctx, in)
}
func (c *local) ListPids(ctx context.Context, in *shimapi.ListPidsRequest) (*shimapi.ListPidsResponse, error) {
return c.s.ListPids(ctx, in)
}
func (c *local) CloseIO(ctx context.Context, in *shimapi.CloseIORequest) (*ptypes.Empty, error) {
return c.s.CloseIO(ctx, in)
}
func (c *local) Checkpoint(ctx context.Context, in *shimapi.CheckpointTaskRequest) (*ptypes.Empty, error) {
return c.s.Checkpoint(ctx, in)
}
func (c *local) ShimInfo(ctx context.Context, in *ptypes.Empty) (*shimapi.ShimInfoResponse, error) {
return c.s.ShimInfo(ctx, in)
}
func (c *local) Update(ctx context.Context, in *shimapi.UpdateTaskRequest) (*ptypes.Empty, error) {
return c.s.Update(ctx, in)
}
func (c *local) Wait(ctx context.Context, in *shimapi.WaitRequest) (*shimapi.WaitResponse, error) {
return c.s.Wait(ctx, in)
}

View file

@ -1,673 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package shim
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sync"
"github.com/containerd/console"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/pkg/process"
"github.com/containerd/containerd/pkg/stdio"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/runtime/linux/runctypes"
shimapi "github.com/containerd/containerd/runtime/v1/shim/v1"
"github.com/containerd/containerd/sys/reaper"
runc "github.com/containerd/go-runc"
"github.com/containerd/typeurl"
ptypes "github.com/gogo/protobuf/types"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
var (
empty = &ptypes.Empty{}
bufPool = sync.Pool{
New: func() interface{} {
buffer := make([]byte, 4096)
return &buffer
},
}
)
// Config contains shim specific configuration
type Config struct {
Path string
Namespace string
WorkDir string
Criu string
RuntimeRoot string
SystemdCgroup bool
}
// NewService returns a new shim service that can be used via GRPC
func NewService(config Config, publisher events.Publisher) (*Service, error) {
if config.Namespace == "" {
return nil, fmt.Errorf("shim namespace cannot be empty")
}
ctx := namespaces.WithNamespace(context.Background(), config.Namespace)
ctx = log.WithLogger(ctx, logrus.WithFields(logrus.Fields{
"namespace": config.Namespace,
"path": config.Path,
"pid": os.Getpid(),
}))
s := &Service{
config: config,
context: ctx,
processes: make(map[string]process.Process),
events: make(chan interface{}, 128),
ec: reaper.Default.Subscribe(),
}
go s.processExits()
if err := s.initPlatform(); err != nil {
return nil, errors.Wrap(err, "failed to initialized platform behavior")
}
go s.forward(publisher)
return s, nil
}
// Service is the shim implementation of a remote shim over GRPC
type Service struct {
mu sync.Mutex
config Config
context context.Context
processes map[string]process.Process
events chan interface{}
platform stdio.Platform
ec chan runc.Exit
// Filled by Create()
id string
bundle string
}
// Create a new initial process and container with the underlying OCI runtime
func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ *shimapi.CreateTaskResponse, err error) {
var mounts []process.Mount
for _, m := range r.Rootfs {
mounts = append(mounts, process.Mount{
Type: m.Type,
Source: m.Source,
Target: m.Target,
Options: m.Options,
})
}
rootfs := ""
if len(mounts) > 0 {
rootfs = filepath.Join(r.Bundle, "rootfs")
if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) {
return nil, err
}
}
config := &process.CreateConfig{
ID: r.ID,
Bundle: r.Bundle,
Runtime: r.Runtime,
Rootfs: mounts,
Terminal: r.Terminal,
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Checkpoint: r.Checkpoint,
ParentCheckpoint: r.ParentCheckpoint,
Options: r.Options,
}
defer func() {
if err != nil {
if err2 := mount.UnmountAll(rootfs, 0); err2 != nil {
log.G(ctx).WithError(err2).Warn("Failed to cleanup rootfs mount")
}
}
}()
for _, rm := range mounts {
m := &mount.Mount{
Type: rm.Type,
Source: rm.Source,
Options: rm.Options,
}
if err := m.Mount(rootfs); err != nil {
return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m)
}
}
s.mu.Lock()
defer s.mu.Unlock()
process, err := newInit(
ctx,
s.config.Path,
s.config.WorkDir,
s.config.RuntimeRoot,
s.config.Namespace,
s.config.Criu,
s.config.SystemdCgroup,
s.platform,
config,
rootfs,
)
if err != nil {
return nil, errdefs.ToGRPC(err)
}
if err := process.Create(ctx, config); err != nil {
return nil, errdefs.ToGRPC(err)
}
// save the main task id and bundle to the shim for additional requests
s.id = r.ID
s.bundle = r.Bundle
pid := process.Pid()
s.processes[r.ID] = process
return &shimapi.CreateTaskResponse{
Pid: uint32(pid),
}, nil
}
// Start a process
func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.StartResponse, error) {
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if err := p.Start(ctx); err != nil {
return nil, err
}
return &shimapi.StartResponse{
ID: p.ID(),
Pid: uint32(p.Pid()),
}, nil
}
// Delete the initial process and container
func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteResponse, error) {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
if err := p.Delete(ctx); err != nil {
return nil, errdefs.ToGRPC(err)
}
s.mu.Lock()
delete(s.processes, s.id)
s.mu.Unlock()
s.platform.Close()
return &shimapi.DeleteResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
Pid: uint32(p.Pid()),
}, nil
}
// DeleteProcess deletes an exec'd process
func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) {
if r.ID == s.id {
return nil, status.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess")
}
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if err := p.Delete(ctx); err != nil {
return nil, errdefs.ToGRPC(err)
}
s.mu.Lock()
delete(s.processes, r.ID)
s.mu.Unlock()
return &shimapi.DeleteResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
Pid: uint32(p.Pid()),
}, nil
}
// Exec an additional process inside the container
func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*ptypes.Empty, error) {
s.mu.Lock()
if p := s.processes[r.ID]; p != nil {
s.mu.Unlock()
return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ID)
}
p := s.processes[s.id]
s.mu.Unlock()
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
process, err := p.(*process.Init).Exec(ctx, s.config.Path, &process.ExecConfig{
ID: r.ID,
Terminal: r.Terminal,
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Spec: r.Spec,
})
if err != nil {
return nil, errdefs.ToGRPC(err)
}
s.mu.Lock()
s.processes[r.ID] = process
s.mu.Unlock()
return empty, nil
}
// ResizePty of a process
func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*ptypes.Empty, error) {
if r.ID == "" {
return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "id not provided")
}
ws := console.WinSize{
Width: uint16(r.Width),
Height: uint16(r.Height),
}
s.mu.Lock()
p := s.processes[r.ID]
s.mu.Unlock()
if p == nil {
return nil, errors.Errorf("process does not exist %s", r.ID)
}
if err := p.Resize(ws); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
// State returns runtime state information for a process
func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.StateResponse, error) {
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
st, err := p.Status(ctx)
if err != nil {
return nil, err
}
status := task.StatusUnknown
switch st {
case "created":
status = task.StatusCreated
case "running":
status = task.StatusRunning
case "stopped":
status = task.StatusStopped
case "paused":
status = task.StatusPaused
case "pausing":
status = task.StatusPausing
}
sio := p.Stdio()
return &shimapi.StateResponse{
ID: p.ID(),
Bundle: s.bundle,
Pid: uint32(p.Pid()),
Status: status,
Stdin: sio.Stdin,
Stdout: sio.Stdout,
Stderr: sio.Stderr,
Terminal: sio.Terminal,
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
}, nil
}
// Pause the container
func (s *Service) Pause(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
if err := p.(*process.Init).Pause(ctx); err != nil {
return nil, err
}
return empty, nil
}
// Resume the container
func (s *Service) Resume(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
if err := p.(*process.Init).Resume(ctx); err != nil {
return nil, err
}
return empty, nil
}
// Kill a process with the provided signal
func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Empty, error) {
if r.ID == "" {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
if err := p.Kill(ctx, r.Signal, r.All); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if err := p.Kill(ctx, r.Signal, r.All); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
// ListPids returns all pids inside the container
func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*shimapi.ListPidsResponse, error) {
pids, err := s.getContainerPids(ctx, r.ID)
if err != nil {
return nil, errdefs.ToGRPC(err)
}
var processes []*task.ProcessInfo
s.mu.Lock()
defer s.mu.Unlock()
for _, pid := range pids {
pInfo := task.ProcessInfo{
Pid: pid,
}
for _, p := range s.processes {
if p.Pid() == int(pid) {
d := &runctypes.ProcessDetails{
ExecID: p.ID(),
}
a, err := typeurl.MarshalAny(d)
if err != nil {
return nil, errors.Wrapf(err, "failed to marshal process %d info", pid)
}
pInfo.Info = a
break
}
}
processes = append(processes, &pInfo)
}
return &shimapi.ListPidsResponse{
Processes: processes,
}, nil
}
// CloseIO of a process
func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*ptypes.Empty, error) {
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if stdin := p.Stdin(); stdin != nil {
if err := stdin.Close(); err != nil {
return nil, errors.Wrap(err, "close stdin")
}
}
return empty, nil
}
// Checkpoint the container
func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointTaskRequest) (*ptypes.Empty, error) {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
var options runctypes.CheckpointOptions
if r.Options != nil {
v, err := typeurl.UnmarshalAny(r.Options)
if err != nil {
return nil, err
}
options = *v.(*runctypes.CheckpointOptions)
}
if err := p.(*process.Init).Checkpoint(ctx, &process.CheckpointConfig{
Path: r.Path,
Exit: options.Exit,
AllowOpenTCP: options.OpenTcp,
AllowExternalUnixSockets: options.ExternalUnixSockets,
AllowTerminal: options.Terminal,
FileLocks: options.FileLocks,
EmptyNamespaces: options.EmptyNamespaces,
WorkDir: options.WorkPath,
}); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
// ShimInfo returns shim information such as the shim's pid
func (s *Service) ShimInfo(ctx context.Context, r *ptypes.Empty) (*shimapi.ShimInfoResponse, error) {
return &shimapi.ShimInfoResponse{
ShimPid: uint32(os.Getpid()),
}, nil
}
// Update a running container
func (s *Service) Update(ctx context.Context, r *shimapi.UpdateTaskRequest) (*ptypes.Empty, error) {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
if err := p.(*process.Init).Update(ctx, r.Resources); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
// Wait for a process to exit
func (s *Service) Wait(ctx context.Context, r *shimapi.WaitRequest) (*shimapi.WaitResponse, error) {
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
p.Wait()
return &shimapi.WaitResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
}, nil
}
func (s *Service) processExits() {
for e := range s.ec {
s.checkProcesses(e)
}
}
func (s *Service) checkProcesses(e runc.Exit) {
var p process.Process
s.mu.Lock()
for _, proc := range s.processes {
if proc.Pid() == e.Pid {
p = proc
break
}
}
s.mu.Unlock()
if p == nil {
log.G(s.context).Debugf("process with id:%d wasn't found", e.Pid)
return
}
if ip, ok := p.(*process.Init); ok {
// Ensure all children are killed
if shouldKillAllOnExit(s.context, s.bundle) {
if err := ip.KillAll(s.context); err != nil {
log.G(s.context).WithError(err).WithField("id", ip.ID()).
Error("failed to kill init's children")
}
}
}
p.SetExited(e.Status)
s.events <- &eventstypes.TaskExit{
ContainerID: s.id,
ID: p.ID(),
Pid: uint32(e.Pid),
ExitStatus: uint32(e.Status),
ExitedAt: p.ExitedAt(),
}
}
func shouldKillAllOnExit(ctx context.Context, bundlePath string) bool {
var bundleSpec specs.Spec
bundleConfigContents, err := ioutil.ReadFile(filepath.Join(bundlePath, "config.json"))
if err != nil {
log.G(ctx).WithError(err).Error("shouldKillAllOnExit: failed to read config.json")
return true
}
if err := json.Unmarshal(bundleConfigContents, &bundleSpec); err != nil {
log.G(ctx).WithError(err).Error("shouldKillAllOnExit: failed to unmarshal bundle json")
return true
}
if bundleSpec.Linux != nil {
for _, ns := range bundleSpec.Linux.Namespaces {
if ns.Type == specs.PIDNamespace && ns.Path == "" {
return false
}
}
}
return true
}
func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
ps, err := p.(*process.Init).Runtime().Ps(ctx, id)
if err != nil {
return nil, err
}
pids := make([]uint32, 0, len(ps))
for _, pid := range ps {
pids = append(pids, uint32(pid))
}
return pids, nil
}
func (s *Service) forward(publisher events.Publisher) {
for e := range s.events {
if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil {
log.G(s.context).WithError(err).Error("post event")
}
}
}
// getInitProcess returns initial process
func (s *Service) getInitProcess() (process.Process, error) {
s.mu.Lock()
defer s.mu.Unlock()
p := s.processes[s.id]
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
return p, nil
}
// getExecProcess returns exec process
func (s *Service) getExecProcess(id string) (process.Process, error) {
s.mu.Lock()
defer s.mu.Unlock()
p := s.processes[id]
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s does not exist", id)
}
return p, nil
}
func getTopic(ctx context.Context, e interface{}) string {
switch e.(type) {
case *eventstypes.TaskCreate:
return runtime.TaskCreateEventTopic
case *eventstypes.TaskStart:
return runtime.TaskStartEventTopic
case *eventstypes.TaskOOM:
return runtime.TaskOOMEventTopic
case *eventstypes.TaskExit:
return runtime.TaskExitEventTopic
case *eventstypes.TaskDelete:
return runtime.TaskDeleteEventTopic
case *eventstypes.TaskExecAdded:
return runtime.TaskExecAddedEventTopic
case *eventstypes.TaskExecStarted:
return runtime.TaskExecStartedEventTopic
case *eventstypes.TaskPaused:
return runtime.TaskPausedEventTopic
case *eventstypes.TaskResumed:
return runtime.TaskResumedEventTopic
case *eventstypes.TaskCheckpointed:
return runtime.TaskCheckpointedEventTopic
default:
logrus.Warnf("no topic for type %#v", e)
}
return runtime.TaskUnknownTopic
}
func newInit(ctx context.Context, path, workDir, runtimeRoot, namespace, criu string, systemdCgroup bool, platform stdio.Platform, r *process.CreateConfig, rootfs string) (*process.Init, error) {
var options runctypes.CreateOptions
if r.Options != nil {
v, err := typeurl.UnmarshalAny(r.Options)
if err != nil {
return nil, err
}
options = *v.(*runctypes.CreateOptions)
}
runtime := process.NewRunc(runtimeRoot, path, namespace, r.Runtime, criu, systemdCgroup)
p := process.New(r.ID, runtime, stdio.Stdio{
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Terminal: r.Terminal,
})
p.Bundle = r.Bundle
p.Platform = platform
p.Rootfs = rootfs
p.WorkDir = workDir
p.IoUID = int(options.IoUid)
p.IoGID = int(options.IoGid)
p.NoPivotRoot = options.NoPivotRoot
p.NoNewKeyring = options.NoNewKeyring
p.CriuWorkPath = options.CriuWorkPath
if p.CriuWorkPath == "" {
// if criu work path not set, use container WorkDir
p.CriuWorkPath = p.WorkDir
}
return p, nil
}

View file

@ -1,191 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package shim
import (
"context"
"io"
"net/url"
"os"
"sync"
"syscall"
"github.com/containerd/console"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/pkg/process"
"github.com/containerd/fifo"
"github.com/pkg/errors"
)
type linuxPlatform struct {
epoller *console.Epoller
}
func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, id, stdin, stdout, stderr string, wg *sync.WaitGroup) (cons console.Console, retErr error) {
if p.epoller == nil {
return nil, errors.New("uninitialized epoller")
}
epollConsole, err := p.epoller.Add(console)
if err != nil {
return nil, err
}
var cwg sync.WaitGroup
if stdin != "" {
in, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY, 0)
if err != nil {
return nil, err
}
cwg.Add(1)
go func() {
cwg.Done()
bp := bufPool.Get().(*[]byte)
defer bufPool.Put(bp)
io.CopyBuffer(epollConsole, in, *bp)
// we need to shutdown epollConsole when pipe broken
epollConsole.Shutdown(p.epoller.CloseConsole)
epollConsole.Close()
}()
}
uri, err := url.Parse(stdout)
if err != nil {
return nil, errors.Wrap(err, "unable to parse stdout uri")
}
switch uri.Scheme {
case "binary":
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
cmd := process.NewBinaryCmd(uri, id, ns)
// In case of unexpected errors during logging binary start, close open pipes
var filesToClose []*os.File
defer func() {
if retErr != nil {
process.CloseFiles(filesToClose...)
}
}()
// Create pipe to be used by logging binary for Stdout
outR, outW, err := os.Pipe()
if err != nil {
return nil, errors.Wrap(err, "failed to create stdout pipes")
}
filesToClose = append(filesToClose, outR)
// Stderr is created for logging binary but unused when terminal is true
serrR, _, err := os.Pipe()
if err != nil {
return nil, errors.Wrap(err, "failed to create stderr pipes")
}
filesToClose = append(filesToClose, serrR)
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
filesToClose = append(filesToClose, r)
cmd.ExtraFiles = append(cmd.ExtraFiles, outR, serrR, w)
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
io.Copy(outW, epollConsole)
outW.Close()
wg.Done()
}()
if err := cmd.Start(); err != nil {
return nil, errors.Wrap(err, "failed to start logging binary process")
}
// Close our side of the pipe after start
if err := w.Close(); err != nil {
return nil, errors.Wrap(err, "failed to close write pipe after start")
}
// Wait for the logging binary to be ready
b := make([]byte, 1)
if _, err := r.Read(b); err != nil && err != io.EOF {
return nil, errors.Wrap(err, "failed to read from logging binary")
}
cwg.Wait()
default:
outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0)
if err != nil {
return nil, err
}
outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0)
if err != nil {
return nil, err
}
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(outw, epollConsole, *p)
outw.Close()
outr.Close()
wg.Done()
}()
cwg.Wait()
}
return epollConsole, nil
}
func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error {
if p.epoller == nil {
return errors.New("uninitialized epoller")
}
epollConsole, ok := cons.(*console.EpollConsole)
if !ok {
return errors.Errorf("expected EpollConsole, got %#v", cons)
}
return epollConsole.Shutdown(p.epoller.CloseConsole)
}
func (p *linuxPlatform) Close() error {
return p.epoller.Close()
}
// initialize a single epoll fd to manage our consoles. `initPlatform` should
// only be called once.
func (s *Service) initPlatform() error {
if s.platform != nil {
return nil
}
epoller, err := console.NewEpoller()
if err != nil {
return errors.Wrap(err, "failed to initialize epoller")
}
s.platform = &linuxPlatform{
epoller: epoller,
}
go epoller.Wait()
return nil
}

View file

@ -1,160 +0,0 @@
// +build !windows,!linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package shim
import (
"context"
"io"
"net/url"
"os"
"sync"
"syscall"
"github.com/containerd/console"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/pkg/process"
"github.com/containerd/fifo"
"github.com/pkg/errors"
)
type unixPlatform struct {
}
func (p *unixPlatform) CopyConsole(ctx context.Context, console console.Console, id, stdin, stdout, stderr string, wg *sync.WaitGroup) (cons console.Console, retErr error) {
var cwg sync.WaitGroup
if stdin != "" {
in, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY, 0)
if err != nil {
return nil, err
}
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(console, in, *p)
}()
}
uri, err := url.Parse(stdout)
if err != nil {
return nil, errors.Wrap(err, "unable to parse stdout uri")
}
switch uri.Scheme {
case "binary":
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
cmd := process.NewBinaryCmd(uri, id, ns)
// In case of unexpected errors during logging binary start, close open pipes
var filesToClose []*os.File
defer func() {
if retErr != nil {
process.CloseFiles(filesToClose...)
}
}()
// Create pipe to be used by logging binary for Stdout
outR, outW, err := os.Pipe()
if err != nil {
return nil, errors.Wrap(err, "failed to create stdout pipes")
}
filesToClose = append(filesToClose, outR)
// Stderr is created for logging binary but unused when terminal is true
serrR, _, err := os.Pipe()
if err != nil {
return nil, errors.Wrap(err, "failed to create stderr pipes")
}
filesToClose = append(filesToClose, serrR)
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
filesToClose = append(filesToClose, r)
cmd.ExtraFiles = append(cmd.ExtraFiles, outR, serrR, w)
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
io.Copy(outW, console)
outW.Close()
wg.Done()
}()
if err := cmd.Start(); err != nil {
return nil, errors.Wrap(err, "failed to start logging binary process")
}
// Close our side of the pipe after start
if err := w.Close(); err != nil {
return nil, errors.Wrap(err, "failed to close write pipe after start")
}
// Wait for the logging binary to be ready
b := make([]byte, 1)
if _, err := r.Read(b); err != nil && err != io.EOF {
return nil, errors.Wrap(err, "failed to read from logging binary")
}
cwg.Wait()
default:
outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0)
if err != nil {
return nil, err
}
outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0)
if err != nil {
return nil, err
}
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(outw, console, *p)
outw.Close()
outr.Close()
wg.Done()
}()
cwg.Wait()
}
return console, nil
}
func (p *unixPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error {
return nil
}
func (p *unixPlatform) Close() error {
return nil
}
func (s *Service) initPlatform() error {
s.platform = &unixPlatform{}
return nil
}

View file

@ -1,17 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package shim

File diff suppressed because it is too large Load diff

View file

@ -1,181 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
syntax = "proto3";
package containerd.runtime.linux.shim.v1;
import "google/protobuf/any.proto";
import "google/protobuf/empty.proto";
import weak "gogoproto/gogo.proto";
import "google/protobuf/timestamp.proto";
import "github.com/containerd/containerd/api/types/mount.proto";
import "github.com/containerd/containerd/api/types/task/task.proto";
option go_package = "github.com/containerd/containerd/runtime/v1/shim/v1;shim";
// Shim service is launched for each container and is responsible for owning the IO
// for the container and its additional processes. The shim is also the parent of
// each container and allows reattaching to the IO and receiving the exit status
// for the container processes.
service Shim {
// State returns shim and task state information.
rpc State(StateRequest) returns (StateResponse);
rpc Create(CreateTaskRequest) returns (CreateTaskResponse);
rpc Start(StartRequest) returns (StartResponse);
rpc Delete(google.protobuf.Empty) returns (DeleteResponse);
rpc DeleteProcess(DeleteProcessRequest) returns (DeleteResponse);
rpc ListPids(ListPidsRequest) returns (ListPidsResponse);
rpc Pause(google.protobuf.Empty) returns (google.protobuf.Empty);
rpc Resume(google.protobuf.Empty) returns (google.protobuf.Empty);
rpc Checkpoint(CheckpointTaskRequest) returns (google.protobuf.Empty);
rpc Kill(KillRequest) returns (google.protobuf.Empty);
rpc Exec(ExecProcessRequest) returns (google.protobuf.Empty);
rpc ResizePty(ResizePtyRequest) returns (google.protobuf.Empty);
rpc CloseIO(CloseIORequest) returns (google.protobuf.Empty);
// ShimInfo returns information about the shim.
rpc ShimInfo(google.protobuf.Empty) returns (ShimInfoResponse);
rpc Update(UpdateTaskRequest) returns (google.protobuf.Empty);
rpc Wait(WaitRequest) returns (WaitResponse);
}
message CreateTaskRequest {
string id = 1;
string bundle = 2;
string runtime = 3;
repeated containerd.types.Mount rootfs = 4;
bool terminal = 5;
string stdin = 6;
string stdout = 7;
string stderr = 8;
string checkpoint = 9;
string parent_checkpoint = 10;
google.protobuf.Any options = 11;
}
message CreateTaskResponse {
uint32 pid = 1;
}
message DeleteResponse {
uint32 pid = 1;
uint32 exit_status = 2;
google.protobuf.Timestamp exited_at = 3 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false];
}
message DeleteProcessRequest {
string id = 1;
}
message ExecProcessRequest {
string id = 1;
bool terminal = 2;
string stdin = 3;
string stdout = 4;
string stderr = 5;
google.protobuf.Any spec = 6;
}
message ExecProcessResponse {
}
message ResizePtyRequest {
string id = 1;
uint32 width = 2;
uint32 height = 3;
}
message StateRequest {
string id = 1;
}
message StateResponse {
string id = 1;
string bundle = 2;
uint32 pid = 3;
containerd.v1.types.Status status = 4;
string stdin = 5;
string stdout = 6;
string stderr = 7;
bool terminal = 8;
uint32 exit_status = 9;
google.protobuf.Timestamp exited_at = 10 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false];
}
message KillRequest {
string id = 1;
uint32 signal = 2;
bool all = 3;
}
message CloseIORequest {
string id = 1;
bool stdin = 2;
}
message ListPidsRequest {
string id = 1;
}
message ListPidsResponse {
repeated containerd.v1.types.ProcessInfo processes = 1;
}
message CheckpointTaskRequest {
string path = 1;
google.protobuf.Any options = 2;
}
message ShimInfoResponse {
uint32 shim_pid = 1;
}
message UpdateTaskRequest {
google.protobuf.Any resources = 1;
}
message StartRequest {
string id = 1;
}
message StartResponse {
string id = 1;
uint32 pid = 2;
}
message WaitRequest {
string id = 1;
}
message WaitResponse {
uint32 exit_status = 1;
google.protobuf.Timestamp exited_at = 2 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false];
}

View file

@ -1,248 +0,0 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package reaper
import (
"os/exec"
"sync"
"time"
runc "github.com/containerd/go-runc"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// ErrNoSuchProcess is returned when the process no longer exists
var ErrNoSuchProcess = errors.New("no such process")
const bufferSize = 32
type subscriber struct {
sync.Mutex
c chan runc.Exit
closed bool
}
func (s *subscriber) close() {
s.Lock()
if s.closed {
s.Unlock()
return
}
close(s.c)
s.closed = true
s.Unlock()
}
func (s *subscriber) do(fn func()) {
s.Lock()
fn()
s.Unlock()
}
// Reap should be called when the process receives an SIGCHLD. Reap will reap
// all exited processes and close their wait channels
func Reap() error {
now := time.Now()
exits, err := reap(false)
for _, e := range exits {
done := Default.notify(runc.Exit{
Timestamp: now,
Pid: e.Pid,
Status: e.Status,
})
select {
case <-done:
case <-time.After(1 * time.Second):
}
}
return err
}
// Default is the default monitor initialized for the package
var Default = &Monitor{
subscribers: make(map[chan runc.Exit]*subscriber),
}
// Monitor monitors the underlying system for process status changes
type Monitor struct {
sync.Mutex
subscribers map[chan runc.Exit]*subscriber
}
// Start starts the command a registers the process with the reaper
func (m *Monitor) Start(c *exec.Cmd) (chan runc.Exit, error) {
ec := m.Subscribe()
if err := c.Start(); err != nil {
m.Unsubscribe(ec)
return nil, err
}
return ec, nil
}
// Wait blocks until a process is signal as dead.
// User should rely on the value of the exit status to determine if the
// command was successful or not.
func (m *Monitor) Wait(c *exec.Cmd, ec chan runc.Exit) (int, error) {
for e := range ec {
if e.Pid == c.Process.Pid {
// make sure we flush all IO
c.Wait()
m.Unsubscribe(ec)
return e.Status, nil
}
}
// return no such process if the ec channel is closed and no more exit
// events will be sent
return -1, ErrNoSuchProcess
}
// Subscribe to process exit changes
func (m *Monitor) Subscribe() chan runc.Exit {
c := make(chan runc.Exit, bufferSize)
m.Lock()
m.subscribers[c] = &subscriber{
c: c,
}
m.Unlock()
return c
}
// Unsubscribe to process exit changes
func (m *Monitor) Unsubscribe(c chan runc.Exit) {
m.Lock()
s, ok := m.subscribers[c]
if !ok {
m.Unlock()
return
}
s.close()
delete(m.subscribers, c)
m.Unlock()
}
func (m *Monitor) getSubscribers() map[chan runc.Exit]*subscriber {
out := make(map[chan runc.Exit]*subscriber)
m.Lock()
for k, v := range m.subscribers {
out[k] = v
}
m.Unlock()
return out
}
func (m *Monitor) notify(e runc.Exit) chan struct{} {
const timeout = 1 * time.Millisecond
var (
done = make(chan struct{}, 1)
timer = time.NewTimer(timeout)
success = make(map[chan runc.Exit]struct{})
)
stop(timer, true)
go func() {
defer close(done)
for {
var (
failed int
subscribers = m.getSubscribers()
)
for _, s := range subscribers {
s.do(func() {
if s.closed {
return
}
if _, ok := success[s.c]; ok {
return
}
timer.Reset(timeout)
recv := true
select {
case s.c <- e:
success[s.c] = struct{}{}
case <-timer.C:
recv = false
failed++
}
stop(timer, recv)
})
}
// all subscribers received the message
if failed == 0 {
return
}
}
}()
return done
}
func stop(timer *time.Timer, recv bool) {
if !timer.Stop() && recv {
<-timer.C
}
}
// exit is the wait4 information from an exited process
type exit struct {
Pid int
Status int
}
// reap reaps all child processes for the calling process and returns their
// exit information
func reap(wait bool) (exits []exit, err error) {
var (
ws unix.WaitStatus
rus unix.Rusage
)
flag := unix.WNOHANG
if wait {
flag = 0
}
for {
pid, err := unix.Wait4(-1, &ws, flag, &rus)
if err != nil {
if err == unix.ECHILD {
return exits, nil
}
return exits, err
}
if pid <= 0 {
return exits, nil
}
exits = append(exits, exit{
Pid: pid,
Status: exitStatus(ws),
})
}
}
const exitSignalOffset = 128
// exitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly
func exitStatus(status unix.WaitStatus) int {
if status.Signaled() {
return exitSignalOffset + int(status.Signal())
}
return status.ExitStatus()
}

View file

@ -1,39 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package reaper
import (
"unsafe"
"golang.org/x/sys/unix"
)
// SetSubreaper sets the value i as the subreaper setting for the calling process
func SetSubreaper(i int) error {
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
}
// GetSubreaper returns the subreaper setting for the calling process
func GetSubreaper() (int, error) {
var i uintptr
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
return -1, err
}
return int(i), nil
}

9
vendor/modules.txt vendored
View file

@ -189,9 +189,7 @@ github.com/containerd/containerd/oci
github.com/containerd/containerd/pkg/apparmor
github.com/containerd/containerd/pkg/cap
github.com/containerd/containerd/pkg/dialer
github.com/containerd/containerd/pkg/process
github.com/containerd/containerd/pkg/seccomp
github.com/containerd/containerd/pkg/stdio
github.com/containerd/containerd/pkg/userns
github.com/containerd/containerd/platforms
github.com/containerd/containerd/plugin
@ -203,13 +201,7 @@ github.com/containerd/containerd/remotes/docker/auth
github.com/containerd/containerd/remotes/docker/schema1
github.com/containerd/containerd/remotes/errors
github.com/containerd/containerd/rootfs
github.com/containerd/containerd/runtime
github.com/containerd/containerd/runtime/linux/runctypes
github.com/containerd/containerd/runtime/v1
github.com/containerd/containerd/runtime/v1/linux
github.com/containerd/containerd/runtime/v1/shim
github.com/containerd/containerd/runtime/v1/shim/client
github.com/containerd/containerd/runtime/v1/shim/v1
github.com/containerd/containerd/runtime/v2/runc/options
github.com/containerd/containerd/services
github.com/containerd/containerd/services/content/contentserver
@ -218,7 +210,6 @@ github.com/containerd/containerd/services/server/config
github.com/containerd/containerd/snapshots
github.com/containerd/containerd/snapshots/proxy
github.com/containerd/containerd/sys
github.com/containerd/containerd/sys/reaper
github.com/containerd/containerd/version
# github.com/containerd/continuity v0.2.2
## explicit; go 1.13