Browse Source

daemon: improve log messages during startup / shutdown

Consistently set "container ID" as a field for log messages, so that
logs can be associated with a container.

With this logs look like;

    INFO[2020-12-15T12:30:46.239329903Z] Loading containers: start.
    DEBU[2020-12-15T12:30:46.239919357Z] processing event stream      module=libcontainerd namespace=moby
    DEBU[2020-12-15T12:30:46.242061458Z] loaded container             container=622dec5f737d532da347bc627655ebc351fa5887476e8b8c33e5fbc5d0e48b5c paused=false running=false
    DEBU[2020-12-15T12:30:46.242185251Z] loaded container             container=47f348160645f46a17c758d120dec600967eed4adf08dd28b809725971d062cc paused=false running=false
    DEBU[2020-12-15T12:30:46.242912375Z] loaded container             container=e29c34c14b84810bc1e6cb6978a81e863601bfbe9ffe076c07dd5f6a439289d6 paused=false running=false
    DEBU[2020-12-15T12:30:46.243165260Z] loaded container             container=31d40ee3e591a50ebee790b08c2bec751610d2eca51ca1a371ea1ff66ea46c1d paused=false running=false
    DEBU[2020-12-15T12:30:46.243585164Z] loaded container             container=03dd5b1dc251a12d2e74eb54cb3ace66c437db228238a8d4831a264c9313c192 paused=false running=false
    DEBU[2020-12-15T12:30:46.244870764Z] loaded container             container=b774141975cc511cc61fc5f374793503bb2e8fa774d6580ac47111a089de1b9b paused=false running=false
    DEBU[2020-12-15T12:30:46.245140276Z] loaded container             container=b8a7229824fb84ff6f5af537a8ba987d106bf9a24a9aad3b628605d26b3facc4 paused=false running=false
    DEBU[2020-12-15T12:30:46.245457025Z] loaded container             container=b3256ff87fc6f243d9e044fb3d7988ef61c86bfb957d90c0227e8a9697ffa49c paused=false running=false
    DEBU[2020-12-15T12:30:46.292515417Z] restoring container          container=b3256ff87fc6f243d9e044fb3d7988ef61c86bfb957d90c0227e8a9697ffa49c paused=false running=false
    DEBU[2020-12-15T12:30:46.292612379Z] restoring container          container=31d40ee3e591a50ebee790b08c2bec751610d2eca51ca1a371ea1ff66ea46c1d paused=false running=false
    DEBU[2020-12-15T12:30:46.292573767Z] restoring container          container=b8a7229824fb84ff6f5af537a8ba987d106bf9a24a9aad3b628605d26b3facc4 paused=false running=false
    DEBU[2020-12-15T12:30:46.292602437Z] restoring container          container=b774141975cc511cc61fc5f374793503bb2e8fa774d6580ac47111a089de1b9b paused=false running=false
    DEBU[2020-12-15T12:30:46.305032730Z] restoring container          container=47f348160645f46a17c758d120dec600967eed4adf08dd28b809725971d062cc paused=false running=false
    DEBU[2020-12-15T12:30:46.305421360Z] restoring container          container=622dec5f737d532da347bc627655ebc351fa5887476e8b8c33e5fbc5d0e48b5c paused=false running=false
    DEBU[2020-12-15T12:30:46.305558773Z] restoring container          container=03dd5b1dc251a12d2e74eb54cb3ace66c437db228238a8d4831a264c9313c192 paused=false running=false
    DEBU[2020-12-15T12:30:46.307662990Z] restoring container          container=e29c34c14b84810bc1e6cb6978a81e863601bfbe9ffe076c07dd5f6a439289d6 paused=false running=false
    ...
    INFO[2020-12-15T12:30:46.536506204Z] Loading containers: done.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
Sebastiaan van Stijn 4 years ago
parent
commit
7e600eaae0
1 changed files with 58 additions and 49 deletions
  1. 58 49
      daemon/daemon.go

+ 58 - 49
daemon/daemon.go

@@ -239,31 +239,36 @@ func (daemon *Daemon) restore() error {
 			_ = sem.Acquire(context.Background(), 1)
 			defer sem.Release(1)
 
-			container, err := daemon.load(id)
+			log := logrus.WithField("container", id)
+
+			c, err := daemon.load(id)
 			if err != nil {
-				logrus.Errorf("Failed to load container %v: %v", id, err)
+				log.WithError(err).Error("failed to load container")
 				return
 			}
-			if !system.IsOSSupported(container.OS) {
-				logrus.Errorf("Failed to load container %v: %s (%q)", id, system.ErrNotSupportedOperatingSystem, container.OS)
+			if !system.IsOSSupported(c.OS) {
+				log.Errorf("failed to load container: %s (%q)", system.ErrNotSupportedOperatingSystem, c.OS)
 				return
 			}
 			// Ignore the container if it does not support the current driver being used by the graph
-			currentDriverForContainerOS := daemon.graphDrivers[container.OS]
-			if (container.Driver == "" && currentDriverForContainerOS == "aufs") || container.Driver == currentDriverForContainerOS {
-				rwlayer, err := daemon.imageService.GetLayerByID(container.ID, container.OS)
+			currentDriverForContainerOS := daemon.graphDrivers[c.OS]
+			if (c.Driver == "" && currentDriverForContainerOS == "aufs") || c.Driver == currentDriverForContainerOS {
+				rwlayer, err := daemon.imageService.GetLayerByID(c.ID, c.OS)
 				if err != nil {
-					logrus.Errorf("Failed to load container mount %v: %v", id, err)
+					log.WithError(err).Error("failed to load container mount")
 					return
 				}
-				container.RWLayer = rwlayer
-				logrus.Debugf("Loaded container %v, isRunning: %v", container.ID, container.IsRunning())
+				c.RWLayer = rwlayer
+				log.WithFields(logrus.Fields{
+					"running": c.IsRunning(),
+					"paused":  c.IsPaused(),
+				}).Debug("loaded container")
 
 				mapLock.Lock()
-				containers[container.ID] = container
+				containers[c.ID] = c
 				mapLock.Unlock()
 			} else {
-				logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
+				log.Debugf("cannot load container because it was created with another storage driver")
 			}
 		}(v.Name())
 	}
@@ -280,15 +285,17 @@ func (daemon *Daemon) restore() error {
 			_ = sem.Acquire(context.Background(), 1)
 			defer sem.Release(1)
 
+			log := logrus.WithField("container", c.ID)
+
 			if err := daemon.registerName(c); err != nil {
-				logrus.Errorf("Failed to register container name %s: %s", c.ID, err)
+				log.WithError(err).Errorf("failed to register container name: %s", c.Name)
 				mapLock.Lock()
 				delete(containers, c.ID)
 				mapLock.Unlock()
 				return
 			}
 			if err := daemon.Register(c); err != nil {
-				logrus.Errorf("Failed to register container %s: %s", c.ID, err)
+				log.WithError(err).Error("failed to register container")
 				mapLock.Lock()
 				delete(containers, c.ID)
 				mapLock.Unlock()
@@ -300,7 +307,7 @@ func (daemon *Daemon) restore() error {
 			// Fixes https://github.com/docker/docker/issues/22536
 			if c.HostConfig.LogConfig.Type == "" {
 				if err := daemon.mergeAndVerifyLogConfig(&c.HostConfig.LogConfig); err != nil {
-					logrus.Errorf("Failed to verify log config for container %s: %q", c.ID, err)
+					log.WithError(err).Error("failed to verify log config for container")
 				}
 			}
 		}(c)
@@ -314,17 +321,18 @@ func (daemon *Daemon) restore() error {
 			_ = sem.Acquire(context.Background(), 1)
 			defer sem.Release(1)
 
+			log := logrus.WithField("container", c.ID)
+
 			daemon.backportMountSpec(c)
 			if err := daemon.checkpointAndSave(c); err != nil {
-				logrus.WithError(err).WithField("container", c.ID).Error("error saving backported mountspec to disk")
+				log.WithError(err).Error("error saving backported mountspec to disk")
 			}
 
 			daemon.setStateCounter(c)
 
-			logrus.WithFields(logrus.Fields{
-				"container": c.ID,
-				"running":   c.IsRunning(),
-				"paused":    c.IsPaused(),
+			log.WithFields(logrus.Fields{
+				"running": c.IsRunning(),
+				"paused":  c.IsPaused(),
 			}).Debug("restoring container")
 
 			var (
@@ -337,18 +345,18 @@ func (daemon *Daemon) restore() error {
 
 			alive, _, process, err = daemon.containerd.Restore(context.Background(), c.ID, c.InitializeStdio)
 			if err != nil && !errdefs.IsNotFound(err) {
-				logrus.Errorf("Failed to restore container %s with containerd: %s", c.ID, err)
+				log.WithError(err).Error("failed to restore container with containerd")
 				return
 			}
 			if !alive && process != nil {
 				ec, exitedAt, err = process.Delete(context.Background())
 				if err != nil && !errdefs.IsNotFound(err) {
-					logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
+					log.WithError(err).Error("failed to delete container from containerd")
 					return
 				}
 			} else if !daemon.configStore.LiveRestoreEnabled {
 				if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) {
-					logrus.WithError(err).WithField("container", c.ID).Error("error shutting down container")
+					log.WithError(err).Error("error shutting down container")
 					return
 				}
 				c.ResetRestartManager(false)
@@ -360,27 +368,23 @@ func (daemon *Daemon) restore() error {
 				if c.IsPaused() && alive {
 					s, err := daemon.containerd.Status(context.Background(), c.ID)
 					if err != nil {
-						logrus.WithError(err).WithField("container", c.ID).
-							Errorf("Failed to get container status")
+						log.WithError(err).Error("failed to get container status")
 					} else {
-						logrus.WithField("container", c.ID).WithField("state", s).
-							Info("restored container paused")
+						log.WithField("state", s).Info("restored container paused")
 						switch s {
 						case containerd.Paused, containerd.Pausing:
 							// nothing to do
 						case containerd.Stopped:
 							alive = false
 						case containerd.Unknown:
-							logrus.WithField("container", c.ID).
-								Error("Unknown status for container during restore")
+							log.Error("unknown status for paused container during restore")
 						default:
 							// running
 							c.Lock()
 							c.Paused = false
 							daemon.setStateCounter(c)
 							if err := c.CheckpointTo(daemon.containersReplica); err != nil {
-								logrus.WithError(err).WithField("container", c.ID).
-									Error("Failed to update stopped container state")
+								log.WithError(err).Error("failed to update paused container state")
 							}
 							c.Unlock()
 						}
@@ -392,7 +396,7 @@ func (daemon *Daemon) restore() error {
 					c.SetStopped(&container.ExitStatus{ExitCode: int(ec), ExitedAt: exitedAt})
 					daemon.Cleanup(c)
 					if err := c.CheckpointTo(daemon.containersReplica); err != nil {
-						logrus.Errorf("Failed to update stopped container %s state: %v", c.ID, err)
+						log.WithError(err).Error("failed to update stopped container state")
 					}
 					c.Unlock()
 				}
@@ -405,10 +409,10 @@ func (daemon *Daemon) restore() error {
 					// stopped/restarted/removed.
 					// See #29365 for related information.
 					// The error is only logged here.
-					logrus.Warnf("Failed to mount container on getting BaseFs path %v: %v", c.ID, err)
+					log.WithError(err).Warn("failed to mount container to get BaseFs path")
 				} else {
 					if err := daemon.Unmount(c); err != nil {
-						logrus.Warnf("Failed to umount container on getting BaseFs path %v: %v", c.ID, err)
+						log.WithError(err).Warn("failed to umount container to get BaseFs path")
 					}
 				}
 
@@ -416,7 +420,7 @@ func (daemon *Daemon) restore() error {
 				if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() {
 					options, err := daemon.buildSandboxOptions(c)
 					if err != nil {
-						logrus.Warnf("Failed build sandbox option to restore container %s: %v", c.ID, err)
+						log.WithError(err).Warn("failed to build sandbox option to restore container")
 					}
 					mapLock.Lock()
 					activeSandboxes[c.NetworkSettings.SandboxID] = options
@@ -452,11 +456,12 @@ func (daemon *Daemon) restore() error {
 				// associated volumes, network links or both to also
 				// be removed. So we put the container in the "dead"
 				// state and leave further processing up to them.
-				logrus.Debugf("Resetting RemovalInProgress flag from %v", c.ID)
 				c.RemovalInProgress = false
 				c.Dead = true
 				if err := c.CheckpointTo(daemon.containersReplica); err != nil {
-					logrus.Errorf("Failed to update RemovalInProgress container %s state: %v", c.ID, err)
+					log.WithError(err).Error("failed to update RemovalInProgress container state")
+				} else {
+					log.Debugf("reset RemovalInProgress state for container")
 				}
 			}
 			c.Unlock()
@@ -476,7 +481,7 @@ func (daemon *Daemon) restore() error {
 			_ = sem.Acquire(context.Background(), 1)
 
 			if err := daemon.registerLinks(c, c.HostConfig); err != nil {
-				logrus.Errorf("failed to register link for container %s: %v", c.ID, err)
+				logrus.WithField("container", c.ID).WithError(err).Error("failed to register link for container")
 			}
 
 			sem.Release(1)
@@ -489,7 +494,10 @@ func (daemon *Daemon) restore() error {
 		group.Add(1)
 		go func(c *container.Container, chNotify chan struct{}) {
 			_ = sem.Acquire(context.Background(), 1)
-			logrus.Debugf("Starting container %s", c.ID)
+
+			log := logrus.WithField("container", c.ID)
+
+			log.Debug("starting container")
 
 			// ignore errors here as this is a best effort to wait for children to be
 			//   running before we try to start the container
@@ -509,7 +517,7 @@ func (daemon *Daemon) restore() error {
 			// Make sure networks are available before starting
 			daemon.waitForNetworks(c)
 			if err := daemon.containerStart(c, "", "", true); err != nil {
-				logrus.Errorf("Failed to start container %s: %s", c.ID, err)
+				log.WithError(err).Error("failed to start container")
 			}
 			close(chNotify)
 
@@ -525,7 +533,7 @@ func (daemon *Daemon) restore() error {
 			_ = sem.Acquire(context.Background(), 1)
 
 			if err := daemon.ContainerRm(cid, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
-				logrus.Errorf("Failed to remove container %s: %s", cid, err)
+				logrus.WithField("container", cid).WithError(err).Error("failed to remove container")
 			}
 
 			sem.Release(1)
@@ -556,7 +564,7 @@ func (daemon *Daemon) restore() error {
 			_ = sem.Acquire(context.Background(), 1)
 
 			if err := daemon.prepareMountPoints(c); err != nil {
-				logrus.Error(err)
+				logrus.WithField("container", c.ID).WithError(err).Error("failed to prepare mountpoints for container")
 			}
 
 			sem.Release(1)
@@ -600,7 +608,7 @@ func (daemon *Daemon) RestartSwarmContainers() {
 					}
 
 					if err := daemon.containerStart(c, "", "", true); err != nil {
-						logrus.Error(err)
+						logrus.WithField("container", c.ID).WithError(err).Error("failed to start swarm container")
 					}
 
 					sem.Release(1)
@@ -634,7 +642,7 @@ func (daemon *Daemon) waitForNetworks(c *container.Container) {
 			dur := 60 * time.Second
 			timer := time.NewTimer(dur)
 
-			logrus.Debugf("Container %s waiting for network to be ready", c.Name)
+			logrus.WithField("container", c.ID).Debugf("Container %s waiting for network to be ready", c.Name)
 			select {
 			case <-daemon.discoveryWatcher.ReadyCh():
 			case <-timer.C:
@@ -1228,15 +1236,16 @@ func (daemon *Daemon) Shutdown() error {
 			if !c.IsRunning() {
 				return
 			}
-			logrus.Debugf("stopping %s", c.ID)
+			log := logrus.WithField("container", c.ID)
+			log.Debug("shutting down container")
 			if err := daemon.shutdownContainer(c); err != nil {
-				logrus.Errorf("Stop container error: %v", err)
+				log.WithError(err).Error("failed to shut down container")
 				return
 			}
 			if mountid, err := daemon.imageService.GetLayerMountID(c.ID, c.OS); err == nil {
 				daemon.cleanupMountsByID(mountid)
 			}
-			logrus.Debugf("container stopped %s", c.ID)
+			log.Debugf("shut down container")
 		})
 	}
 
@@ -1287,7 +1296,7 @@ func (daemon *Daemon) Mount(container *container.Container) error {
 	if err != nil {
 		return err
 	}
-	logrus.Debugf("container mounted via layerStore: %v", dir)
+	logrus.WithField("container", container.ID).Debugf("container mounted via layerStore: %v", dir)
 
 	if container.BaseFS != nil && container.BaseFS.Path() != dir.Path() {
 		// The mount path reported by the graph driver should always be trusted on Windows, since the
@@ -1309,7 +1318,7 @@ func (daemon *Daemon) Unmount(container *container.Container) error {
 		return errors.New("RWLayer of container " + container.ID + " is unexpectedly nil")
 	}
 	if err := container.RWLayer.Unmount(); err != nil {
-		logrus.Errorf("Error unmounting container %s: %s", container.ID, err)
+		logrus.WithField("container", container.ID).WithError(err).Error("error unmounting container")
 		return err
 	}