daemon: improve log messages during startup / shutdown
Consistently set "container ID" as a field for log messages, so that logs can be associated with a container. With this logs look like; INFO[2020-12-15T12:30:46.239329903Z] Loading containers: start. DEBU[2020-12-15T12:30:46.239919357Z] processing event stream module=libcontainerd namespace=moby DEBU[2020-12-15T12:30:46.242061458Z] loaded container container=622dec5f737d532da347bc627655ebc351fa5887476e8b8c33e5fbc5d0e48b5c paused=false running=false DEBU[2020-12-15T12:30:46.242185251Z] loaded container container=47f348160645f46a17c758d120dec600967eed4adf08dd28b809725971d062cc paused=false running=false DEBU[2020-12-15T12:30:46.242912375Z] loaded container container=e29c34c14b84810bc1e6cb6978a81e863601bfbe9ffe076c07dd5f6a439289d6 paused=false running=false DEBU[2020-12-15T12:30:46.243165260Z] loaded container container=31d40ee3e591a50ebee790b08c2bec751610d2eca51ca1a371ea1ff66ea46c1d paused=false running=false DEBU[2020-12-15T12:30:46.243585164Z] loaded container container=03dd5b1dc251a12d2e74eb54cb3ace66c437db228238a8d4831a264c9313c192 paused=false running=false DEBU[2020-12-15T12:30:46.244870764Z] loaded container container=b774141975cc511cc61fc5f374793503bb2e8fa774d6580ac47111a089de1b9b paused=false running=false DEBU[2020-12-15T12:30:46.245140276Z] loaded container container=b8a7229824fb84ff6f5af537a8ba987d106bf9a24a9aad3b628605d26b3facc4 paused=false running=false DEBU[2020-12-15T12:30:46.245457025Z] loaded container container=b3256ff87fc6f243d9e044fb3d7988ef61c86bfb957d90c0227e8a9697ffa49c paused=false running=false DEBU[2020-12-15T12:30:46.292515417Z] restoring container container=b3256ff87fc6f243d9e044fb3d7988ef61c86bfb957d90c0227e8a9697ffa49c paused=false running=false DEBU[2020-12-15T12:30:46.292612379Z] restoring container container=31d40ee3e591a50ebee790b08c2bec751610d2eca51ca1a371ea1ff66ea46c1d paused=false running=false DEBU[2020-12-15T12:30:46.292573767Z] restoring container container=b8a7229824fb84ff6f5af537a8ba987d106bf9a24a9aad3b628605d26b3facc4 paused=false running=false DEBU[2020-12-15T12:30:46.292602437Z] restoring container container=b774141975cc511cc61fc5f374793503bb2e8fa774d6580ac47111a089de1b9b paused=false running=false DEBU[2020-12-15T12:30:46.305032730Z] restoring container container=47f348160645f46a17c758d120dec600967eed4adf08dd28b809725971d062cc paused=false running=false DEBU[2020-12-15T12:30:46.305421360Z] restoring container container=622dec5f737d532da347bc627655ebc351fa5887476e8b8c33e5fbc5d0e48b5c paused=false running=false DEBU[2020-12-15T12:30:46.305558773Z] restoring container container=03dd5b1dc251a12d2e74eb54cb3ace66c437db228238a8d4831a264c9313c192 paused=false running=false DEBU[2020-12-15T12:30:46.307662990Z] restoring container container=e29c34c14b84810bc1e6cb6978a81e863601bfbe9ffe076c07dd5f6a439289d6 paused=false running=false ... INFO[2020-12-15T12:30:46.536506204Z] Loading containers: done. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
parent
f0014860c1
commit
7e600eaae0
1 changed files with 58 additions and 49 deletions
107
daemon/daemon.go
107
daemon/daemon.go
|
@ -239,31 +239,36 @@ func (daemon *Daemon) restore() error {
|
|||
_ = sem.Acquire(context.Background(), 1)
|
||||
defer sem.Release(1)
|
||||
|
||||
container, err := daemon.load(id)
|
||||
log := logrus.WithField("container", id)
|
||||
|
||||
c, err := daemon.load(id)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to load container %v: %v", id, err)
|
||||
log.WithError(err).Error("failed to load container")
|
||||
return
|
||||
}
|
||||
if !system.IsOSSupported(container.OS) {
|
||||
logrus.Errorf("Failed to load container %v: %s (%q)", id, system.ErrNotSupportedOperatingSystem, container.OS)
|
||||
if !system.IsOSSupported(c.OS) {
|
||||
log.Errorf("failed to load container: %s (%q)", system.ErrNotSupportedOperatingSystem, c.OS)
|
||||
return
|
||||
}
|
||||
// Ignore the container if it does not support the current driver being used by the graph
|
||||
currentDriverForContainerOS := daemon.graphDrivers[container.OS]
|
||||
if (container.Driver == "" && currentDriverForContainerOS == "aufs") || container.Driver == currentDriverForContainerOS {
|
||||
rwlayer, err := daemon.imageService.GetLayerByID(container.ID, container.OS)
|
||||
currentDriverForContainerOS := daemon.graphDrivers[c.OS]
|
||||
if (c.Driver == "" && currentDriverForContainerOS == "aufs") || c.Driver == currentDriverForContainerOS {
|
||||
rwlayer, err := daemon.imageService.GetLayerByID(c.ID, c.OS)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to load container mount %v: %v", id, err)
|
||||
log.WithError(err).Error("failed to load container mount")
|
||||
return
|
||||
}
|
||||
container.RWLayer = rwlayer
|
||||
logrus.Debugf("Loaded container %v, isRunning: %v", container.ID, container.IsRunning())
|
||||
c.RWLayer = rwlayer
|
||||
log.WithFields(logrus.Fields{
|
||||
"running": c.IsRunning(),
|
||||
"paused": c.IsPaused(),
|
||||
}).Debug("loaded container")
|
||||
|
||||
mapLock.Lock()
|
||||
containers[container.ID] = container
|
||||
containers[c.ID] = c
|
||||
mapLock.Unlock()
|
||||
} else {
|
||||
logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
|
||||
log.Debugf("cannot load container because it was created with another storage driver")
|
||||
}
|
||||
}(v.Name())
|
||||
}
|
||||
|
@ -280,15 +285,17 @@ func (daemon *Daemon) restore() error {
|
|||
_ = sem.Acquire(context.Background(), 1)
|
||||
defer sem.Release(1)
|
||||
|
||||
log := logrus.WithField("container", c.ID)
|
||||
|
||||
if err := daemon.registerName(c); err != nil {
|
||||
logrus.Errorf("Failed to register container name %s: %s", c.ID, err)
|
||||
log.WithError(err).Errorf("failed to register container name: %s", c.Name)
|
||||
mapLock.Lock()
|
||||
delete(containers, c.ID)
|
||||
mapLock.Unlock()
|
||||
return
|
||||
}
|
||||
if err := daemon.Register(c); err != nil {
|
||||
logrus.Errorf("Failed to register container %s: %s", c.ID, err)
|
||||
log.WithError(err).Error("failed to register container")
|
||||
mapLock.Lock()
|
||||
delete(containers, c.ID)
|
||||
mapLock.Unlock()
|
||||
|
@ -300,7 +307,7 @@ func (daemon *Daemon) restore() error {
|
|||
// Fixes https://github.com/docker/docker/issues/22536
|
||||
if c.HostConfig.LogConfig.Type == "" {
|
||||
if err := daemon.mergeAndVerifyLogConfig(&c.HostConfig.LogConfig); err != nil {
|
||||
logrus.Errorf("Failed to verify log config for container %s: %q", c.ID, err)
|
||||
log.WithError(err).Error("failed to verify log config for container")
|
||||
}
|
||||
}
|
||||
}(c)
|
||||
|
@ -314,17 +321,18 @@ func (daemon *Daemon) restore() error {
|
|||
_ = sem.Acquire(context.Background(), 1)
|
||||
defer sem.Release(1)
|
||||
|
||||
log := logrus.WithField("container", c.ID)
|
||||
|
||||
daemon.backportMountSpec(c)
|
||||
if err := daemon.checkpointAndSave(c); err != nil {
|
||||
logrus.WithError(err).WithField("container", c.ID).Error("error saving backported mountspec to disk")
|
||||
log.WithError(err).Error("error saving backported mountspec to disk")
|
||||
}
|
||||
|
||||
daemon.setStateCounter(c)
|
||||
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"container": c.ID,
|
||||
"running": c.IsRunning(),
|
||||
"paused": c.IsPaused(),
|
||||
log.WithFields(logrus.Fields{
|
||||
"running": c.IsRunning(),
|
||||
"paused": c.IsPaused(),
|
||||
}).Debug("restoring container")
|
||||
|
||||
var (
|
||||
|
@ -337,18 +345,18 @@ func (daemon *Daemon) restore() error {
|
|||
|
||||
alive, _, process, err = daemon.containerd.Restore(context.Background(), c.ID, c.InitializeStdio)
|
||||
if err != nil && !errdefs.IsNotFound(err) {
|
||||
logrus.Errorf("Failed to restore container %s with containerd: %s", c.ID, err)
|
||||
log.WithError(err).Error("failed to restore container with containerd")
|
||||
return
|
||||
}
|
||||
if !alive && process != nil {
|
||||
ec, exitedAt, err = process.Delete(context.Background())
|
||||
if err != nil && !errdefs.IsNotFound(err) {
|
||||
logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
|
||||
log.WithError(err).Error("failed to delete container from containerd")
|
||||
return
|
||||
}
|
||||
} else if !daemon.configStore.LiveRestoreEnabled {
|
||||
if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) {
|
||||
logrus.WithError(err).WithField("container", c.ID).Error("error shutting down container")
|
||||
log.WithError(err).Error("error shutting down container")
|
||||
return
|
||||
}
|
||||
c.ResetRestartManager(false)
|
||||
|
@ -360,27 +368,23 @@ func (daemon *Daemon) restore() error {
|
|||
if c.IsPaused() && alive {
|
||||
s, err := daemon.containerd.Status(context.Background(), c.ID)
|
||||
if err != nil {
|
||||
logrus.WithError(err).WithField("container", c.ID).
|
||||
Errorf("Failed to get container status")
|
||||
log.WithError(err).Error("failed to get container status")
|
||||
} else {
|
||||
logrus.WithField("container", c.ID).WithField("state", s).
|
||||
Info("restored container paused")
|
||||
log.WithField("state", s).Info("restored container paused")
|
||||
switch s {
|
||||
case containerd.Paused, containerd.Pausing:
|
||||
// nothing to do
|
||||
case containerd.Stopped:
|
||||
alive = false
|
||||
case containerd.Unknown:
|
||||
logrus.WithField("container", c.ID).
|
||||
Error("Unknown status for container during restore")
|
||||
log.Error("unknown status for paused container during restore")
|
||||
default:
|
||||
// running
|
||||
c.Lock()
|
||||
c.Paused = false
|
||||
daemon.setStateCounter(c)
|
||||
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
||||
logrus.WithError(err).WithField("container", c.ID).
|
||||
Error("Failed to update stopped container state")
|
||||
log.WithError(err).Error("failed to update paused container state")
|
||||
}
|
||||
c.Unlock()
|
||||
}
|
||||
|
@ -392,7 +396,7 @@ func (daemon *Daemon) restore() error {
|
|||
c.SetStopped(&container.ExitStatus{ExitCode: int(ec), ExitedAt: exitedAt})
|
||||
daemon.Cleanup(c)
|
||||
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
||||
logrus.Errorf("Failed to update stopped container %s state: %v", c.ID, err)
|
||||
log.WithError(err).Error("failed to update stopped container state")
|
||||
}
|
||||
c.Unlock()
|
||||
}
|
||||
|
@ -405,10 +409,10 @@ func (daemon *Daemon) restore() error {
|
|||
// stopped/restarted/removed.
|
||||
// See #29365 for related information.
|
||||
// The error is only logged here.
|
||||
logrus.Warnf("Failed to mount container on getting BaseFs path %v: %v", c.ID, err)
|
||||
log.WithError(err).Warn("failed to mount container to get BaseFs path")
|
||||
} else {
|
||||
if err := daemon.Unmount(c); err != nil {
|
||||
logrus.Warnf("Failed to umount container on getting BaseFs path %v: %v", c.ID, err)
|
||||
log.WithError(err).Warn("failed to umount container to get BaseFs path")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -416,7 +420,7 @@ func (daemon *Daemon) restore() error {
|
|||
if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() {
|
||||
options, err := daemon.buildSandboxOptions(c)
|
||||
if err != nil {
|
||||
logrus.Warnf("Failed build sandbox option to restore container %s: %v", c.ID, err)
|
||||
log.WithError(err).Warn("failed to build sandbox option to restore container")
|
||||
}
|
||||
mapLock.Lock()
|
||||
activeSandboxes[c.NetworkSettings.SandboxID] = options
|
||||
|
@ -452,11 +456,12 @@ func (daemon *Daemon) restore() error {
|
|||
// associated volumes, network links or both to also
|
||||
// be removed. So we put the container in the "dead"
|
||||
// state and leave further processing up to them.
|
||||
logrus.Debugf("Resetting RemovalInProgress flag from %v", c.ID)
|
||||
c.RemovalInProgress = false
|
||||
c.Dead = true
|
||||
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
||||
logrus.Errorf("Failed to update RemovalInProgress container %s state: %v", c.ID, err)
|
||||
log.WithError(err).Error("failed to update RemovalInProgress container state")
|
||||
} else {
|
||||
log.Debugf("reset RemovalInProgress state for container")
|
||||
}
|
||||
}
|
||||
c.Unlock()
|
||||
|
@ -476,7 +481,7 @@ func (daemon *Daemon) restore() error {
|
|||
_ = sem.Acquire(context.Background(), 1)
|
||||
|
||||
if err := daemon.registerLinks(c, c.HostConfig); err != nil {
|
||||
logrus.Errorf("failed to register link for container %s: %v", c.ID, err)
|
||||
logrus.WithField("container", c.ID).WithError(err).Error("failed to register link for container")
|
||||
}
|
||||
|
||||
sem.Release(1)
|
||||
|
@ -489,7 +494,10 @@ func (daemon *Daemon) restore() error {
|
|||
group.Add(1)
|
||||
go func(c *container.Container, chNotify chan struct{}) {
|
||||
_ = sem.Acquire(context.Background(), 1)
|
||||
logrus.Debugf("Starting container %s", c.ID)
|
||||
|
||||
log := logrus.WithField("container", c.ID)
|
||||
|
||||
log.Debug("starting container")
|
||||
|
||||
// ignore errors here as this is a best effort to wait for children to be
|
||||
// running before we try to start the container
|
||||
|
@ -509,7 +517,7 @@ func (daemon *Daemon) restore() error {
|
|||
// Make sure networks are available before starting
|
||||
daemon.waitForNetworks(c)
|
||||
if err := daemon.containerStart(c, "", "", true); err != nil {
|
||||
logrus.Errorf("Failed to start container %s: %s", c.ID, err)
|
||||
log.WithError(err).Error("failed to start container")
|
||||
}
|
||||
close(chNotify)
|
||||
|
||||
|
@ -525,7 +533,7 @@ func (daemon *Daemon) restore() error {
|
|||
_ = sem.Acquire(context.Background(), 1)
|
||||
|
||||
if err := daemon.ContainerRm(cid, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
|
||||
logrus.Errorf("Failed to remove container %s: %s", cid, err)
|
||||
logrus.WithField("container", cid).WithError(err).Error("failed to remove container")
|
||||
}
|
||||
|
||||
sem.Release(1)
|
||||
|
@ -556,7 +564,7 @@ func (daemon *Daemon) restore() error {
|
|||
_ = sem.Acquire(context.Background(), 1)
|
||||
|
||||
if err := daemon.prepareMountPoints(c); err != nil {
|
||||
logrus.Error(err)
|
||||
logrus.WithField("container", c.ID).WithError(err).Error("failed to prepare mountpoints for container")
|
||||
}
|
||||
|
||||
sem.Release(1)
|
||||
|
@ -600,7 +608,7 @@ func (daemon *Daemon) RestartSwarmContainers() {
|
|||
}
|
||||
|
||||
if err := daemon.containerStart(c, "", "", true); err != nil {
|
||||
logrus.Error(err)
|
||||
logrus.WithField("container", c.ID).WithError(err).Error("failed to start swarm container")
|
||||
}
|
||||
|
||||
sem.Release(1)
|
||||
|
@ -634,7 +642,7 @@ func (daemon *Daemon) waitForNetworks(c *container.Container) {
|
|||
dur := 60 * time.Second
|
||||
timer := time.NewTimer(dur)
|
||||
|
||||
logrus.Debugf("Container %s waiting for network to be ready", c.Name)
|
||||
logrus.WithField("container", c.ID).Debugf("Container %s waiting for network to be ready", c.Name)
|
||||
select {
|
||||
case <-daemon.discoveryWatcher.ReadyCh():
|
||||
case <-timer.C:
|
||||
|
@ -1228,15 +1236,16 @@ func (daemon *Daemon) Shutdown() error {
|
|||
if !c.IsRunning() {
|
||||
return
|
||||
}
|
||||
logrus.Debugf("stopping %s", c.ID)
|
||||
log := logrus.WithField("container", c.ID)
|
||||
log.Debug("shutting down container")
|
||||
if err := daemon.shutdownContainer(c); err != nil {
|
||||
logrus.Errorf("Stop container error: %v", err)
|
||||
log.WithError(err).Error("failed to shut down container")
|
||||
return
|
||||
}
|
||||
if mountid, err := daemon.imageService.GetLayerMountID(c.ID, c.OS); err == nil {
|
||||
daemon.cleanupMountsByID(mountid)
|
||||
}
|
||||
logrus.Debugf("container stopped %s", c.ID)
|
||||
log.Debugf("shut down container")
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -1287,7 +1296,7 @@ func (daemon *Daemon) Mount(container *container.Container) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logrus.Debugf("container mounted via layerStore: %v", dir)
|
||||
logrus.WithField("container", container.ID).Debugf("container mounted via layerStore: %v", dir)
|
||||
|
||||
if container.BaseFS != nil && container.BaseFS.Path() != dir.Path() {
|
||||
// The mount path reported by the graph driver should always be trusted on Windows, since the
|
||||
|
@ -1309,7 +1318,7 @@ func (daemon *Daemon) Unmount(container *container.Container) error {
|
|||
return errors.New("RWLayer of container " + container.ID + " is unexpectedly nil")
|
||||
}
|
||||
if err := container.RWLayer.Unmount(); err != nil {
|
||||
logrus.Errorf("Error unmounting container %s: %s", container.ID, err)
|
||||
logrus.WithField("container", container.ID).WithError(err).Error("error unmounting container")
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue