|
@@ -2,18 +2,18 @@ package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
|
|
|
|
|
|
import (
|
|
import (
|
|
"context"
|
|
"context"
|
|
- "io"
|
|
|
|
"os"
|
|
"os"
|
|
"os/exec"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"path/filepath"
|
|
"runtime"
|
|
"runtime"
|
|
- "strconv"
|
|
|
|
"strings"
|
|
"strings"
|
|
"time"
|
|
"time"
|
|
|
|
|
|
"github.com/containerd/containerd"
|
|
"github.com/containerd/containerd"
|
|
"github.com/containerd/containerd/services/server/config"
|
|
"github.com/containerd/containerd/services/server/config"
|
|
"github.com/containerd/containerd/sys"
|
|
"github.com/containerd/containerd/sys"
|
|
|
|
+ "github.com/docker/docker/pkg/pidfile"
|
|
|
|
+ "github.com/docker/docker/pkg/process"
|
|
"github.com/docker/docker/pkg/system"
|
|
"github.com/docker/docker/pkg/system"
|
|
"github.com/pelletier/go-toml"
|
|
"github.com/pelletier/go-toml"
|
|
"github.com/pkg/errors"
|
|
"github.com/pkg/errors"
|
|
@@ -124,34 +124,6 @@ func (r *remote) WaitTimeout(d time.Duration) error {
|
|
func (r *remote) Address() string {
|
|
func (r *remote) Address() string {
|
|
return r.GRPC.Address
|
|
return r.GRPC.Address
|
|
}
|
|
}
|
|
-func (r *remote) getContainerdPid() (int, error) {
|
|
|
|
- f, err := os.OpenFile(r.pidFile, os.O_RDWR, 0600)
|
|
|
|
- if err != nil {
|
|
|
|
- if os.IsNotExist(err) {
|
|
|
|
- return -1, nil
|
|
|
|
- }
|
|
|
|
- return -1, err
|
|
|
|
- }
|
|
|
|
- defer f.Close()
|
|
|
|
-
|
|
|
|
- b := make([]byte, 8)
|
|
|
|
- n, err := f.Read(b)
|
|
|
|
- if err != nil && err != io.EOF {
|
|
|
|
- return -1, err
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if n > 0 {
|
|
|
|
- pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
|
|
|
|
- if err != nil {
|
|
|
|
- return -1, err
|
|
|
|
- }
|
|
|
|
- if system.IsProcessAlive(int(pid)) {
|
|
|
|
- return int(pid), nil
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return -1, nil
|
|
|
|
-}
|
|
|
|
|
|
|
|
func (r *remote) getContainerdConfig() (string, error) {
|
|
func (r *remote) getContainerdConfig() (string, error) {
|
|
f, err := os.OpenFile(r.configFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
|
|
f, err := os.OpenFile(r.configFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
|
|
@@ -167,23 +139,22 @@ func (r *remote) getContainerdConfig() (string, error) {
|
|
}
|
|
}
|
|
|
|
|
|
func (r *remote) startContainerd() error {
|
|
func (r *remote) startContainerd() error {
|
|
- pid, err := r.getContainerdPid()
|
|
|
|
- if err != nil {
|
|
|
|
|
|
+ pid, err := pidfile.Read(r.pidFile)
|
|
|
|
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
|
|
return err
|
|
return err
|
|
}
|
|
}
|
|
|
|
|
|
- if pid != -1 {
|
|
|
|
|
|
+ if pid > 0 {
|
|
r.daemonPid = pid
|
|
r.daemonPid = pid
|
|
r.logger.WithField("pid", pid).Infof("%s is still running", binaryName)
|
|
r.logger.WithField("pid", pid).Infof("%s is still running", binaryName)
|
|
return nil
|
|
return nil
|
|
}
|
|
}
|
|
|
|
|
|
- configFile, err := r.getContainerdConfig()
|
|
|
|
|
|
+ cfgFile, err := r.getContainerdConfig()
|
|
if err != nil {
|
|
if err != nil {
|
|
return err
|
|
return err
|
|
}
|
|
}
|
|
-
|
|
|
|
- args := []string{"--config", configFile}
|
|
|
|
|
|
+ args := []string{"--config", cfgFile}
|
|
|
|
|
|
if r.logLevel != "" {
|
|
if r.logLevel != "" {
|
|
args = append(args, "--log-level", r.logLevel)
|
|
args = append(args, "--log-level", r.logLevel)
|
|
@@ -236,9 +207,9 @@ func (r *remote) startContainerd() error {
|
|
r.logger.WithError(err).Warn("failed to adjust OOM score")
|
|
r.logger.WithError(err).Warn("failed to adjust OOM score")
|
|
}
|
|
}
|
|
|
|
|
|
- err = os.WriteFile(r.pidFile, []byte(strconv.Itoa(r.daemonPid)), 0660)
|
|
|
|
|
|
+ err = pidfile.Write(r.pidFile, r.daemonPid)
|
|
if err != nil {
|
|
if err != nil {
|
|
- system.KillProcess(r.daemonPid)
|
|
|
|
|
|
+ process.Kill(r.daemonPid)
|
|
return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
|
|
return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
|
|
}
|
|
}
|
|
|
|
|
|
@@ -357,7 +328,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
|
|
r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
|
|
r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
|
|
|
|
|
|
transientFailureCount++
|
|
transientFailureCount++
|
|
- if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) {
|
|
|
|
|
|
+ if transientFailureCount < maxConnectionRetryCount || process.Alive(r.daemonPid) {
|
|
delay = time.Duration(transientFailureCount) * 200 * time.Millisecond
|
|
delay = time.Duration(transientFailureCount) * 200 * time.Millisecond
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
@@ -365,7 +336,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
|
|
client = nil
|
|
client = nil
|
|
}
|
|
}
|
|
|
|
|
|
- if system.IsProcessAlive(r.daemonPid) {
|
|
|
|
|
|
+ if process.Alive(r.daemonPid) {
|
|
r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
|
|
r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
|
|
r.killDaemon()
|
|
r.killDaemon()
|
|
}
|
|
}
|