moby/libcontainerd/supervisor/remote_daemon.go

379 lines
8.8 KiB
Go
Raw Normal View History

package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
import (
"context"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/services/server/config"
libcontainerd/supervisor: make supervisor adjust OOM score for containerd Containerd, like dockerd has a OOMScore configuration option to adjust its own OOM score. In dockerd, this option was added when default installations were not yet running the daemon as a systemd unit, which made it more complicated to set the score, and adding a daemon option was convenient. A binary adjusting its own score has been frowned upon, as it's more logical to make that the responsibility of the process manager _starting_ the daemon, which is what we did for dockerd in 21578530d7291f2e7bc0b90ace2f058df753a443. There have been discussions on deprecating the daemon flag for dockerd, and similar discussions have been happening for containerd. This patch changes how we set the OOM score for the containerd child process, and to have dockerd (supervisor) set the OOM score, as it's acting as process manager in this case (performing a role similar to systemd otherwise). With this patch, the score is still adjusted as usual, but not written to the containerd configuration file; dockerd --oom-score-adjust=-123 cat /proc/$(pidof containerd)/oom_score_adj -123 As a follow-up, we may consider to adjust the containerd OOM score based on the daemon's own score instead of on the `cli.OOMScoreAdjust` configuration so that we will also adjust the score in situations where dockerd's OOM score was set through other ways (systemd or manually adjusting the cgroup). A TODO was added for this. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-08-11 10:09:13 +00:00
"github.com/containerd/containerd/sys"
"github.com/docker/docker/pkg/process"
"github.com/docker/docker/pkg/system"
"github.com/pelletier/go-toml"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
const (
maxConnectionRetryCount = 3
healthCheckTimeout = 3 * time.Second
shutdownTimeout = 15 * time.Second
startupTimeout = 15 * time.Second
configFile = "containerd.toml"
binaryName = "containerd"
pidFile = "containerd.pid"
)
type remote struct {
config.Config
// configFile is the location where the generated containerd configuration
// file is saved.
configFile string
daemonPid int
pidFile string
logger *logrus.Entry
daemonWaitCh chan struct{}
daemonStartCh chan error
daemonStopCh chan struct{}
stateDir string
libcontainerd/supervisor: make supervisor adjust OOM score for containerd Containerd, like dockerd has a OOMScore configuration option to adjust its own OOM score. In dockerd, this option was added when default installations were not yet running the daemon as a systemd unit, which made it more complicated to set the score, and adding a daemon option was convenient. A binary adjusting its own score has been frowned upon, as it's more logical to make that the responsibility of the process manager _starting_ the daemon, which is what we did for dockerd in 21578530d7291f2e7bc0b90ace2f058df753a443. There have been discussions on deprecating the daemon flag for dockerd, and similar discussions have been happening for containerd. This patch changes how we set the OOM score for the containerd child process, and to have dockerd (supervisor) set the OOM score, as it's acting as process manager in this case (performing a role similar to systemd otherwise). With this patch, the score is still adjusted as usual, but not written to the containerd configuration file; dockerd --oom-score-adjust=-123 cat /proc/$(pidof containerd)/oom_score_adj -123 As a follow-up, we may consider to adjust the containerd OOM score based on the daemon's own score instead of on the `cli.OOMScoreAdjust` configuration so that we will also adjust the score in situations where dockerd's OOM score was set through other ways (systemd or manually adjusting the cgroup). A TODO was added for this. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-08-11 10:09:13 +00:00
// oomScore adjusts the OOM score for the containerd process.
oomScore int
// logLevel overrides the containerd logging-level through the --log-level
// command-line option.
logLevel string
}
// Daemon represents a running containerd daemon
type Daemon interface {
WaitTimeout(time.Duration) error
Address() string
}
// DaemonOpt allows to configure parameters of container daemons
type DaemonOpt func(c *remote) error
// Start starts a containerd daemon and monitors it
func Start(ctx context.Context, rootDir, stateDir string, opts ...DaemonOpt) (Daemon, error) {
r := &remote{
stateDir: stateDir,
Config: config.Config{
libcontainerd: switch generated containerd.toml to v2 (v1 is deprecated) Before this patch: INFO[2022-07-27T14:30:06.188762628Z] Starting up INFO[2022-07-27T14:30:06.190750725Z] libcontainerd: started new containerd process pid=2028 ... WARN[0000] containerd config version `1` has been deprecated and will be removed in containerd v2.0, please switch to version `2`, see https://github.com/containerd/containerd/blob/main/docs/PLUGINS.md#version-header INFO[2022-07-27T14:30:06.220024286Z] starting containerd revision=10c12954828e7c7c9b6e0ea9b0c02b01407d3ae1 version=v1.6.6 With this patch: INFO[2022-07-27T14:28:04.025543517Z] Starting up INFO[2022-07-27T14:28:04.027447105Z] libcontainerd: started new containerd process pid=1377 ... INFO[2022-07-27T14:28:04.054483270Z] starting containerd revision=10c12954828e7c7c9b6e0ea9b0c02b01407d3ae1 version=v1.6.6 And the generated /var/run/docker/containerd/containerd.toml: ```toml disabled_plugins = ["io.containerd.grpc.v1.cri"] imports = [] oom_score = 0 plugin_dir = "" required_plugins = [] root = "/var/lib/docker/containerd/daemon" state = "/var/run/docker/containerd/daemon" temp = "" version = 2 [cgroup] path = "" [debug] address = "/var/run/docker/containerd/containerd-debug.sock" format = "" gid = 0 level = "debug" uid = 0 [grpc] address = "/var/run/docker/containerd/containerd.sock" gid = 0 max_recv_message_size = 16777216 max_send_message_size = 16777216 tcp_address = "" tcp_tls_ca = "" tcp_tls_cert = "" tcp_tls_key = "" uid = 0 [metrics] address = "" grpc_histogram = false [plugins] [proxy_plugins] [stream_processors] [timeouts] [ttrpc] address = "" gid = 0 uid = 0 ``` Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-07-27 14:33:00 +00:00
Version: 2,
Root: filepath.Join(rootDir, "daemon"),
State: filepath.Join(stateDir, "daemon"),
},
configFile: filepath.Join(stateDir, configFile),
daemonPid: -1,
pidFile: filepath.Join(stateDir, pidFile),
logger: logrus.WithField("module", "libcontainerd"),
daemonStartCh: make(chan error, 1),
daemonStopCh: make(chan struct{}),
}
for _, opt := range opts {
if err := opt(r); err != nil {
return nil, err
}
}
r.setDefaults()
if err := system.MkdirAll(stateDir, 0700); err != nil {
return nil, err
}
go r.monitorDaemon(ctx)
timeout := time.NewTimer(startupTimeout)
defer timeout.Stop()
select {
case <-timeout.C:
return nil, errors.New("timeout waiting for containerd to start")
case err := <-r.daemonStartCh:
if err != nil {
return nil, err
}
}
return r, nil
}
func (r *remote) WaitTimeout(d time.Duration) error {
timeout := time.NewTimer(d)
defer timeout.Stop()
select {
case <-timeout.C:
return errors.New("timeout waiting for containerd to stop")
case <-r.daemonStopCh:
}
return nil
}
func (r *remote) Address() string {
return r.GRPC.Address
}
func (r *remote) getContainerdPid() (int, error) {
f, err := os.OpenFile(r.pidFile, os.O_RDWR, 0600)
if err != nil {
if os.IsNotExist(err) {
return -1, nil
}
return -1, err
}
defer f.Close()
b := make([]byte, 8)
n, err := f.Read(b)
if err != nil && err != io.EOF {
return -1, err
}
if n > 0 {
pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
if err != nil {
return -1, err
}
if process.Alive(int(pid)) {
return int(pid), nil
}
}
return -1, nil
}
func (r *remote) getContainerdConfig() (string, error) {
f, err := os.OpenFile(r.configFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
if err != nil {
return "", errors.Wrapf(err, "failed to open containerd config file (%s)", r.configFile)
}
defer f.Close()
if err := toml.NewEncoder(f).Encode(r); err != nil {
return "", errors.Wrapf(err, "failed to write containerd config file (%s)", r.configFile)
}
return r.configFile, nil
}
func (r *remote) startContainerd() error {
pid, err := r.getContainerdPid()
if err != nil {
return err
}
if pid != -1 {
r.daemonPid = pid
r.logger.WithField("pid", pid).Infof("%s is still running", binaryName)
return nil
}
configFile, err := r.getContainerdConfig()
if err != nil {
return err
}
args := []string{"--config", configFile}
Pass log-level to containerd dockerd allows the `--log-level` to be specified, but this log-level was not forwarded to the containerd process. This patch sets containerd's log-level to the same as dockerd if a custom level is provided. Now that `--log-level` is also passed to containerd, the default "info" is removed, so that containerd's default (or the level configured in containerd.toml) is still used if no log-level is set. Before this change: containerd would always be started without a log-level set (only the level that's configured in `containerd.toml`); ``` root 1014 2.5 2.1 496484 43468 pts/0 Sl+ 12:23 0:00 dockerd root 1023 1.2 1.1 681768 23832 ? Ssl 12:23 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml ``` After this change: when running `dockerd` without options (same as current); ``` root 1014 2.5 2.1 496484 43468 pts/0 Sl+ 12:23 0:00 dockerd root 1023 1.2 1.1 681768 23832 ? Ssl 12:23 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml ``` when running `dockerd --debug`: ``` root 600 0.8 2.1 512876 43180 pts/0 Sl+ 12:20 0:00 dockerd --debug root 608 0.6 1.1 624428 23672 ? Ssl 12:20 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml --log-level debug ``` when running `dockerd --log-level=panic` ``` root 747 0.6 2.1 496548 43996 pts/0 Sl+ 12:21 0:00 dockerd --log-level=panic root 755 0.7 1.1 550696 24100 ? Ssl 12:21 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml --log-level panic ``` combining `--debug` and `--log-level` (`--debug` takes precedence): ``` root 880 2.7 2.1 634692 43336 pts/0 Sl+ 12:23 0:00 dockerd --debug --log-level=panic root 888 1.0 1.1 616232 23652 ? Ssl 12:23 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml --log-level debug ``` Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2018-07-09 12:16:35 +00:00
if r.logLevel != "" {
args = append(args, "--log-level", r.logLevel)
Pass log-level to containerd dockerd allows the `--log-level` to be specified, but this log-level was not forwarded to the containerd process. This patch sets containerd's log-level to the same as dockerd if a custom level is provided. Now that `--log-level` is also passed to containerd, the default "info" is removed, so that containerd's default (or the level configured in containerd.toml) is still used if no log-level is set. Before this change: containerd would always be started without a log-level set (only the level that's configured in `containerd.toml`); ``` root 1014 2.5 2.1 496484 43468 pts/0 Sl+ 12:23 0:00 dockerd root 1023 1.2 1.1 681768 23832 ? Ssl 12:23 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml ``` After this change: when running `dockerd` without options (same as current); ``` root 1014 2.5 2.1 496484 43468 pts/0 Sl+ 12:23 0:00 dockerd root 1023 1.2 1.1 681768 23832 ? Ssl 12:23 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml ``` when running `dockerd --debug`: ``` root 600 0.8 2.1 512876 43180 pts/0 Sl+ 12:20 0:00 dockerd --debug root 608 0.6 1.1 624428 23672 ? Ssl 12:20 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml --log-level debug ``` when running `dockerd --log-level=panic` ``` root 747 0.6 2.1 496548 43996 pts/0 Sl+ 12:21 0:00 dockerd --log-level=panic root 755 0.7 1.1 550696 24100 ? Ssl 12:21 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml --log-level panic ``` combining `--debug` and `--log-level` (`--debug` takes precedence): ``` root 880 2.7 2.1 634692 43336 pts/0 Sl+ 12:23 0:00 dockerd --debug --log-level=panic root 888 1.0 1.1 616232 23652 ? Ssl 12:23 0:00 \_ docker-containerd --config /var/run/docker/containerd/containerd.toml --log-level debug ``` Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2018-07-09 12:16:35 +00:00
}
cmd := exec.Command(binaryName, args...)
// redirect containerd logs to docker logs
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.SysProcAttr = containerdSysProcAttr()
// clear the NOTIFY_SOCKET from the env when starting containerd
cmd.Env = nil
for _, e := range os.Environ() {
if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
cmd.Env = append(cmd.Env, e)
}
}
startedCh := make(chan error)
go func() {
// On Linux, when cmd.SysProcAttr.Pdeathsig is set,
// the signal is sent to the subprocess when the creating thread
// terminates. The runtime terminates a thread if a goroutine
// exits while locked to it. Prevent the containerd process
// from getting killed prematurely by ensuring that the thread
// used to start it remains alive until it or the daemon process
// exits. See https://go.dev/issue/27505 for more details.
runtime.LockOSThread()
defer runtime.UnlockOSThread()
err := cmd.Start()
startedCh <- err
if err != nil {
return
}
r.daemonWaitCh = make(chan struct{})
// Reap our child when needed
if err := cmd.Wait(); err != nil {
r.logger.WithError(err).Errorf("containerd did not exit successfully")
}
close(r.daemonWaitCh)
}()
if err := <-startedCh; err != nil {
return err
}
r.daemonPid = cmd.Process.Pid
libcontainerd/supervisor: make supervisor adjust OOM score for containerd Containerd, like dockerd has a OOMScore configuration option to adjust its own OOM score. In dockerd, this option was added when default installations were not yet running the daemon as a systemd unit, which made it more complicated to set the score, and adding a daemon option was convenient. A binary adjusting its own score has been frowned upon, as it's more logical to make that the responsibility of the process manager _starting_ the daemon, which is what we did for dockerd in 21578530d7291f2e7bc0b90ace2f058df753a443. There have been discussions on deprecating the daemon flag for dockerd, and similar discussions have been happening for containerd. This patch changes how we set the OOM score for the containerd child process, and to have dockerd (supervisor) set the OOM score, as it's acting as process manager in this case (performing a role similar to systemd otherwise). With this patch, the score is still adjusted as usual, but not written to the containerd configuration file; dockerd --oom-score-adjust=-123 cat /proc/$(pidof containerd)/oom_score_adj -123 As a follow-up, we may consider to adjust the containerd OOM score based on the daemon's own score instead of on the `cli.OOMScoreAdjust` configuration so that we will also adjust the score in situations where dockerd's OOM score was set through other ways (systemd or manually adjusting the cgroup). A TODO was added for this. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-08-11 10:09:13 +00:00
if err := r.adjustOOMScore(); err != nil {
r.logger.WithError(err).Warn("failed to adjust OOM score")
}
err = os.WriteFile(r.pidFile, []byte(strconv.Itoa(r.daemonPid)), 0660)
if err != nil {
process.Kill(r.daemonPid)
return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
}
r.logger.WithField("pid", r.daemonPid).WithField("address", r.Address()).Infof("started new %s process", binaryName)
return nil
}
libcontainerd/supervisor: make supervisor adjust OOM score for containerd Containerd, like dockerd has a OOMScore configuration option to adjust its own OOM score. In dockerd, this option was added when default installations were not yet running the daemon as a systemd unit, which made it more complicated to set the score, and adding a daemon option was convenient. A binary adjusting its own score has been frowned upon, as it's more logical to make that the responsibility of the process manager _starting_ the daemon, which is what we did for dockerd in 21578530d7291f2e7bc0b90ace2f058df753a443. There have been discussions on deprecating the daemon flag for dockerd, and similar discussions have been happening for containerd. This patch changes how we set the OOM score for the containerd child process, and to have dockerd (supervisor) set the OOM score, as it's acting as process manager in this case (performing a role similar to systemd otherwise). With this patch, the score is still adjusted as usual, but not written to the containerd configuration file; dockerd --oom-score-adjust=-123 cat /proc/$(pidof containerd)/oom_score_adj -123 As a follow-up, we may consider to adjust the containerd OOM score based on the daemon's own score instead of on the `cli.OOMScoreAdjust` configuration so that we will also adjust the score in situations where dockerd's OOM score was set through other ways (systemd or manually adjusting the cgroup). A TODO was added for this. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-08-11 10:09:13 +00:00
func (r *remote) adjustOOMScore() error {
if r.oomScore == 0 || r.daemonPid <= 1 {
// no score configured, or daemonPid contains an invalid PID (we don't
// expect containerd to be running as PID 1 :)).
return nil
}
if err := sys.SetOOMScore(r.daemonPid, r.oomScore); err != nil {
return errors.Wrap(err, "failed to adjust OOM score for containerd process")
}
return nil
}
func (r *remote) monitorDaemon(ctx context.Context) {
var (
transientFailureCount = 0
client *containerd.Client
err error
delay time.Duration
timer = time.NewTimer(0)
started bool
)
defer func() {
if r.daemonPid != -1 {
r.stopDaemon()
}
// cleanup some files
_ = os.Remove(r.pidFile)
r.platformCleanup()
close(r.daemonStopCh)
timer.Stop()
}()
// ensure no races on sending to timer.C even though there is a 0 duration.
if !timer.Stop() {
<-timer.C
}
for {
timer.Reset(delay)
select {
case <-ctx.Done():
r.logger.Info("stopping healthcheck following graceful shutdown")
if client != nil {
client.Close()
}
return
case <-timer.C:
}
if r.daemonPid == -1 {
if r.daemonWaitCh != nil {
select {
case <-ctx.Done():
r.logger.Info("stopping containerd startup following graceful shutdown")
return
case <-r.daemonWaitCh:
}
}
os.RemoveAll(r.GRPC.Address)
if err := r.startContainerd(); err != nil {
if !started {
r.daemonStartCh <- err
return
}
r.logger.WithError(err).Error("failed restarting containerd")
delay = 50 * time.Millisecond
continue
}
client, err = containerd.New(r.GRPC.Address, containerd.WithTimeout(60*time.Second))
if err != nil {
r.logger.WithError(err).Error("failed connecting to containerd")
delay = 100 * time.Millisecond
continue
}
r.logger.WithField("address", r.GRPC.Address).Debug("created containerd monitoring client")
}
if client != nil {
tctx, cancel := context.WithTimeout(ctx, healthCheckTimeout)
_, err := client.IsServing(tctx)
cancel()
if err == nil {
if !started {
close(r.daemonStartCh)
started = true
}
transientFailureCount = 0
select {
case <-r.daemonWaitCh:
case <-ctx.Done():
}
// Set a small delay in case there is a recurring failure (or bug in this code)
// to ensure we don't end up in a super tight loop.
delay = 500 * time.Millisecond
continue
}
r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
transientFailureCount++
if transientFailureCount < maxConnectionRetryCount || process.Alive(r.daemonPid) {
delay = time.Duration(transientFailureCount) * 200 * time.Millisecond
continue
}
client.Close()
client = nil
}
if process.Alive(r.daemonPid) {
r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
r.killDaemon()
}
r.daemonPid = -1
delay = 0
transientFailureCount = 0
}
}