浏览代码

Plumb context through info endpoint

I was trying to find out why `docker info` was sometimes slow so
plumbing a context through to propagate trace data through.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
Brian Goff 1 年之前
父节点
当前提交
677d41aa3b

+ 3 - 3
api/server/router/system/backend.go

@@ -27,8 +27,8 @@ type DiskUsageOptions struct {
 // Backend is the methods that need to be implemented to provide
 // system specific functionality.
 type Backend interface {
-	SystemInfo() *system.Info
-	SystemVersion() types.Version
+	SystemInfo(context.Context) (*system.Info, error)
+	SystemVersion(context.Context) (types.Version, error)
 	SystemDiskUsage(ctx context.Context, opts DiskUsageOptions) (*types.DiskUsage, error)
 	SubscribeToEvents(since, until time.Time, ef filters.Args) ([]events.Message, chan interface{})
 	UnsubscribeFromEvents(chan interface{})
@@ -38,7 +38,7 @@ type Backend interface {
 // ClusterBackend is all the methods that need to be implemented
 // to provide cluster system specific functionality.
 type ClusterBackend interface {
-	Info() swarm.Info
+	Info(context.Context) swarm.Info
 }
 
 // StatusProvider provides methods to get the swarm status of the current node.

+ 9 - 3
api/server/router/system/system_routes.go

@@ -60,10 +60,13 @@ func (s *systemRouter) swarmStatus() string {
 func (s *systemRouter) getInfo(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
 	version := httputils.VersionFromContext(ctx)
 	info, _, _ := s.collectSystemInfo.Do(ctx, version, func(ctx context.Context) (*system.Info, error) {
-		info := s.backend.SystemInfo()
+		info, err := s.backend.SystemInfo(ctx)
+		if err != nil {
+			return nil, err
+		}
 
 		if s.cluster != nil {
-			info.Swarm = s.cluster.Info()
+			info.Swarm = s.cluster.Info(ctx)
 			info.Warnings = append(info.Warnings, info.Swarm.Warnings...)
 		}
 
@@ -97,7 +100,10 @@ func (s *systemRouter) getInfo(ctx context.Context, w http.ResponseWriter, r *ht
 }
 
 func (s *systemRouter) getVersion(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
-	info := s.backend.SystemVersion()
+	info, err := s.backend.SystemVersion(ctx)
+	if err != nil {
+		return err
+	}
 
 	return httputils.WriteJSON(w, http.StatusOK, info)
 }

+ 4 - 3
daemon/cluster/cluster.go

@@ -249,8 +249,8 @@ func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) {
 	return nr, nil
 }
 
-func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
-	return context.WithTimeout(context.Background(), swarmRequestTimeout)
+func (c *Cluster) getRequestContext(ctx context.Context) (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
+	return context.WithTimeout(ctx, swarmRequestTimeout)
 }
 
 // IsManager returns true if Cluster is participating as a manager.
@@ -443,7 +443,8 @@ func (c *Cluster) lockedManagerAction(fn func(ctx context.Context, state nodeSta
 		return c.errNoManager(state)
 	}
 
-	ctx, cancel := c.getRequestContext()
+	ctx := context.TODO()
+	ctx, cancel := c.getRequestContext(ctx)
 	defer cancel()
 
 	return fn(ctx, state)

+ 3 - 1
daemon/cluster/configs.go

@@ -41,7 +41,9 @@ func (c *Cluster) GetConfigs(options apitypes.ConfigListOptions) ([]types.Config
 	if err != nil {
 		return nil, err
 	}
-	ctx, cancel := c.getRequestContext()
+
+	ctx := context.TODO()
+	ctx, cancel := c.getRequestContext(ctx)
 	defer cancel()
 
 	r, err := state.controlClient.ListConfigs(ctx,

+ 1 - 1
daemon/cluster/executor/backend.go

@@ -53,7 +53,7 @@ type Backend interface {
 	SetContainerDependencyStore(name string, store exec.DependencyGetter) error
 	SetContainerSecretReferences(name string, refs []*swarm.SecretReference) error
 	SetContainerConfigReferences(name string, refs []*swarm.ConfigReference) error
-	SystemInfo() *system.Info
+	SystemInfo(context.Context) (*system.Info, error)
 	Containers(ctx context.Context, config *container.ListOptions) ([]*types.Container, error)
 	SetNetworkBootstrapKeys([]*networktypes.EncryptionKey) error
 	DaemonJoinsCluster(provider cluster.Provider)

+ 4 - 1
daemon/cluster/executor/container/executor.go

@@ -58,7 +58,10 @@ func NewExecutor(b executorpkg.Backend, p plugin.Backend, i executorpkg.ImageBac
 
 // Describe returns the underlying node description from the docker client.
 func (e *executor) Describe(ctx context.Context) (*api.NodeDescription, error) {
-	info := e.backend.SystemInfo()
+	info, err := e.backend.SystemInfo(ctx)
+	if err != nil {
+		return nil, err
+	}
 
 	plugins := map[api.PluginDescription]struct{}{}
 	addPlugins := func(typ string, names []string) {

+ 7 - 3
daemon/cluster/networks.go

@@ -12,6 +12,7 @@ import (
 	"github.com/docker/docker/daemon/cluster/convert"
 	internalnetwork "github.com/docker/docker/daemon/network"
 	"github.com/docker/docker/errdefs"
+	"github.com/docker/docker/internal/compatcontext"
 	"github.com/docker/docker/runconfig"
 	swarmapi "github.com/moby/swarmkit/v2/api"
 	"github.com/pkg/errors"
@@ -68,7 +69,8 @@ func (c *Cluster) getNetworks(filters *swarmapi.ListNetworksRequest_Filters) ([]
 		return nil, c.errNoManager(state)
 	}
 
-	ctx, cancel := c.getRequestContext()
+	ctx := context.TODO()
+	ctx, cancel := c.getRequestContext(ctx)
 	defer cancel()
 
 	r, err := state.controlClient.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: filters})
@@ -203,7 +205,8 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
 	}
 	c.mu.Unlock()
 
-	ctx, cancel := c.getRequestContext()
+	ctx := context.TODO()
+	ctx, cancel := c.getRequestContext(ctx)
 	defer cancel()
 
 	taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses)
@@ -222,7 +225,8 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
 	log.G(ctx).Debugf("Successfully attached to network %s with task id %s", target, taskID)
 
 	release := func() {
-		ctx, cancel := c.getRequestContext()
+		ctx := compatcontext.WithoutCancel(ctx)
+		ctx, cancel := c.getRequestContext(ctx)
 		defer cancel()
 		if err := agent.ResourceAllocator().DetachNetwork(ctx, taskID); err != nil {
 			log.G(ctx).Errorf("Failed remove network attachment %s to network %s on allocation failure: %v",

+ 4 - 2
daemon/cluster/nodes.go

@@ -26,7 +26,8 @@ func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, erro
 		return nil, err
 	}
 
-	ctx, cancel := c.getRequestContext()
+	ctx := context.TODO()
+	ctx, cancel := c.getRequestContext(ctx)
 	defer cancel()
 
 	r, err := state.controlClient.ListNodes(
@@ -72,7 +73,8 @@ func (c *Cluster) UpdateNode(input string, version uint64, spec types.NodeSpec)
 			return errdefs.InvalidParameter(err)
 		}
 
-		ctx, cancel := c.getRequestContext()
+		ctx := context.TODO()
+		ctx, cancel := c.getRequestContext(ctx)
 		defer cancel()
 
 		currentNode, err := getNode(ctx, state.controlClient, input)

+ 3 - 1
daemon/cluster/secrets.go

@@ -41,7 +41,9 @@ func (c *Cluster) GetSecrets(options apitypes.SecretListOptions) ([]types.Secret
 	if err != nil {
 		return nil, err
 	}
-	ctx, cancel := c.getRequestContext()
+
+	ctx := context.TODO()
+	ctx, cancel := c.getRequestContext(ctx)
 	defer cancel()
 
 	r, err := state.controlClient.ListSecrets(ctx,

+ 7 - 3
daemon/cluster/services.go

@@ -21,6 +21,7 @@ import (
 	timetypes "github.com/docker/docker/api/types/time"
 	"github.com/docker/docker/daemon/cluster/convert"
 	"github.com/docker/docker/errdefs"
+	"github.com/docker/docker/internal/compatcontext"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
 	gogotypes "github.com/gogo/protobuf/types"
 	swarmapi "github.com/moby/swarmkit/v2/api"
@@ -65,7 +66,8 @@ func (c *Cluster) GetServices(options types.ServiceListOptions) ([]swarm.Service
 		Runtimes:     options.Filters.Get("runtime"),
 	}
 
-	ctx, cancel := c.getRequestContext()
+	ctx := context.TODO()
+	ctx, cancel := c.getRequestContext(ctx)
 	defer cancel()
 
 	r, err := state.controlClient.ListServices(
@@ -263,7 +265,8 @@ func (c *Cluster) CreateService(s swarm.ServiceSpec, encodedAuth string, queryRe
 				// "ctx" could make it impossible to create a service
 				// if the registry is slow or unresponsive.
 				var cancel func()
-				ctx, cancel = c.getRequestContext()
+				ctx = compatcontext.WithoutCancel(ctx)
+				ctx, cancel = c.getRequestContext(ctx)
 				defer cancel()
 			}
 
@@ -378,7 +381,8 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec swa
 				// "ctx" could make it impossible to update a service
 				// if the registry is slow or unresponsive.
 				var cancel func()
-				ctx, cancel = c.getRequestContext()
+				ctx = compatcontext.WithoutCancel(ctx)
+				ctx, cancel = c.getRequestContext(ctx)
 				defer cancel()
 			}
 		}

+ 2 - 2
daemon/cluster/swarm.go

@@ -429,7 +429,7 @@ func (c *Cluster) Leave(ctx context.Context, force bool) error {
 }
 
 // Info returns information about the current cluster state.
-func (c *Cluster) Info() types.Info {
+func (c *Cluster) Info(ctx context.Context) types.Info {
 	info := types.Info{
 		NodeAddr: c.GetAdvertiseAddress(),
 	}
@@ -442,7 +442,7 @@ func (c *Cluster) Info() types.Info {
 		info.Error = state.err.Error()
 	}
 
-	ctx, cancel := c.getRequestContext()
+	ctx, cancel := c.getRequestContext(ctx)
 	defer cancel()
 
 	if state.IsActiveManager() {

+ 2 - 2
daemon/containerd/service.go

@@ -76,8 +76,8 @@ func (i *ImageService) DistributionServices() images.DistributionServices {
 
 // CountImages returns the number of images stored by ImageService
 // called from info.go
-func (i *ImageService) CountImages() int {
-	imgs, err := i.client.ListImages(context.TODO())
+func (i *ImageService) CountImages(ctx context.Context) int {
+	imgs, err := i.client.ListImages(ctx)
 	if err != nil {
 		return 0
 	}

+ 5 - 2
daemon/daemon.go

@@ -1175,7 +1175,10 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
 	}
 	close(d.startupDone)
 
-	info := d.SystemInfo()
+	info, err := d.SystemInfo(ctx)
+	if err != nil {
+		return nil, err
+	}
 	for _, w := range info.Warnings {
 		log.G(ctx).Warn(w)
 	}
@@ -1349,7 +1352,7 @@ func (daemon *Daemon) Subnets() ([]net.IPNet, []net.IPNet) {
 	var v4Subnets []net.IPNet
 	var v6Subnets []net.IPNet
 
-	for _, managedNetwork := range daemon.netController.Networks() {
+	for _, managedNetwork := range daemon.netController.Networks(context.TODO()) {
 		v4infos, v6infos := managedNetwork.IpamInfo()
 		for _, info := range v4infos {
 			if info.IPAMData.Pool != nil {

+ 3 - 1
daemon/daemon_windows.go

@@ -247,8 +247,10 @@ func (daemon *Daemon) initNetworkController(daemonCfg *config.Config, activeSand
 		return err
 	}
 
+	ctx := context.TODO()
+
 	// Remove networks not present in HNS
-	for _, v := range daemon.netController.Networks() {
+	for _, v := range daemon.netController.Networks(ctx) {
 		hnsid := v.DriverOptions()[winlibnetwork.HNSID]
 		found := false
 

+ 1 - 1
daemon/events.go

@@ -68,7 +68,7 @@ func (daemon *Daemon) LogNetworkEventWithAttributes(nw *libnetwork.Network, acti
 // LogDaemonEventWithAttributes generates an event related to the daemon itself with specific given attributes.
 func (daemon *Daemon) LogDaemonEventWithAttributes(action events.Action, attributes map[string]string) {
 	if daemon.EventsService != nil {
-		if name := hostName(); name != "" {
+		if name := hostName(context.TODO()); name != "" {
 			attributes["name"] = name
 		}
 		daemon.EventsService.Log(action, events.DaemonEventType, events.Actor{

+ 1 - 1
daemon/image_service.go

@@ -36,7 +36,7 @@ type ImageService interface {
 	LoadImage(ctx context.Context, inTar io.ReadCloser, outStream io.Writer, quiet bool) error
 	Images(ctx context.Context, opts types.ImageListOptions) ([]*imagetype.Summary, error)
 	LogImageEvent(imageID, refName string, action events.Action)
-	CountImages() int
+	CountImages(ctx context.Context) int
 	ImagesPrune(ctx context.Context, pruneFilters filters.Args) (*types.ImagesPruneReport, error)
 	ImportImage(ctx context.Context, ref reference.Named, platform *ocispec.Platform, msg string, layerReader io.Reader, changes []string) (image.ID, error)
 	TagImage(ctx context.Context, imageID image.ID, newTag reference.Named) error

+ 1 - 1
daemon/images/service.go

@@ -102,7 +102,7 @@ func (i *ImageService) DistributionServices() DistributionServices {
 
 // CountImages returns the number of images stored by ImageService
 // called from info.go
-func (i *ImageService) CountImages() int {
+func (i *ImageService) CountImages(ctx context.Context) int {
 	return i.imageStore.Len()
 }
 

+ 71 - 34
daemon/info.go

@@ -8,6 +8,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/containerd/containerd/tracing"
 	"github.com/containerd/log"
 	"github.com/docker/docker/api"
 	"github.com/docker/docker/api/types"
@@ -27,8 +28,19 @@ import (
 	"github.com/opencontainers/selinux/go-selinux"
 )
 
+func doWithTrace[T any](ctx context.Context, name string, f func() T) T {
+	_, span := tracing.StartSpan(ctx, name)
+	defer span.End()
+	return f()
+}
+
 // SystemInfo returns information about the host server the daemon is running on.
-func (daemon *Daemon) SystemInfo() *system.Info {
+//
+// The only error this should return is due to context cancellation/deadline.
+// Anything else should be logged and ignored because this is looking up
+// multiple things and is often used for debugging.
+// The only case valid early return is when the caller doesn't want the result anymore (ie context cancelled).
+func (daemon *Daemon) SystemInfo(ctx context.Context) (*system.Info, error) {
 	defer metrics.StartTimer(hostInfoFunctions.WithValues("system_info"))()
 
 	sysInfo := daemon.RawSysInfo()
@@ -36,22 +48,22 @@ func (daemon *Daemon) SystemInfo() *system.Info {
 
 	v := &system.Info{
 		ID:                 daemon.id,
-		Images:             daemon.imageService.CountImages(),
+		Images:             daemon.imageService.CountImages(ctx),
 		IPv4Forwarding:     !sysInfo.IPv4ForwardingDisabled,
 		BridgeNfIptables:   !sysInfo.BridgeNFCallIPTablesDisabled,
 		BridgeNfIP6tables:  !sysInfo.BridgeNFCallIP6TablesDisabled,
-		Name:               hostName(),
+		Name:               hostName(ctx),
 		SystemTime:         time.Now().Format(time.RFC3339Nano),
 		LoggingDriver:      daemon.defaultLogConfig.Type,
-		KernelVersion:      kernelVersion(),
-		OperatingSystem:    operatingSystem(),
-		OSVersion:          osVersion(),
+		KernelVersion:      kernelVersion(ctx),
+		OperatingSystem:    operatingSystem(ctx),
+		OSVersion:          osVersion(ctx),
 		IndexServerAddress: registry.IndexServer,
 		OSType:             runtime.GOOS,
 		Architecture:       platform.Architecture,
-		RegistryConfig:     daemon.registryService.ServiceConfig(),
-		NCPU:               sysinfo.NumCPU(),
-		MemTotal:           memInfo().MemTotal,
+		RegistryConfig:     doWithTrace(ctx, "registry.ServiceConfig", daemon.registryService.ServiceConfig),
+		NCPU:               doWithTrace(ctx, "sysinfo.NumCPU", sysinfo.NumCPU),
+		MemTotal:           memInfo(ctx).MemTotal,
 		GenericResources:   daemon.genericResources,
 		DockerRootDir:      cfg.Root,
 		Labels:             cfg.Labels,
@@ -66,24 +78,31 @@ func (daemon *Daemon) SystemInfo() *system.Info {
 	}
 
 	daemon.fillContainerStates(v)
-	daemon.fillDebugInfo(v)
+	daemon.fillDebugInfo(ctx, v)
 	daemon.fillAPIInfo(v, &cfg.Config)
 	// Retrieve platform specific info
-	daemon.fillPlatformInfo(v, sysInfo, cfg)
+	if err := daemon.fillPlatformInfo(ctx, v, sysInfo, cfg); err != nil {
+		return nil, err
+	}
 	daemon.fillDriverInfo(v)
-	daemon.fillPluginsInfo(v, &cfg.Config)
+	daemon.fillPluginsInfo(ctx, v, &cfg.Config)
 	daemon.fillSecurityOptions(v, sysInfo, &cfg.Config)
 	daemon.fillLicense(v)
-	daemon.fillDefaultAddressPools(v, &cfg.Config)
+	daemon.fillDefaultAddressPools(ctx, v, &cfg.Config)
 
-	return v
+	return v, nil
 }
 
 // SystemVersion returns version information about the daemon.
-func (daemon *Daemon) SystemVersion() types.Version {
+//
+// The only error this should return is due to context cancellation/deadline.
+// Anything else should be logged and ignored because this is looking up
+// multiple things and is often used for debugging.
+// The only case valid early return is when the caller doesn't want the result anymore (ie context cancelled).
+func (daemon *Daemon) SystemVersion(ctx context.Context) (types.Version, error) {
 	defer metrics.StartTimer(hostInfoFunctions.WithValues("system_version"))()
 
-	kernelVersion := kernelVersion()
+	kernelVersion := kernelVersion(ctx)
 	cfg := daemon.config()
 
 	v := types.Version{
@@ -120,8 +139,10 @@ func (daemon *Daemon) SystemVersion() types.Version {
 
 	v.Platform.Name = dockerversion.PlatformName
 
-	daemon.fillPlatformVersion(&v, cfg)
-	return v
+	if err := daemon.fillPlatformVersion(ctx, &v, cfg); err != nil {
+		return v, err
+	}
+	return v, nil
 }
 
 func (daemon *Daemon) fillDriverInfo(v *system.Info) {
@@ -140,10 +161,10 @@ WARNING: The %s storage-driver is deprecated, and will be removed in a future re
 	fillDriverWarnings(v)
 }
 
-func (daemon *Daemon) fillPluginsInfo(v *system.Info, cfg *config.Config) {
+func (daemon *Daemon) fillPluginsInfo(ctx context.Context, v *system.Info, cfg *config.Config) {
 	v.Plugins = system.PluginsInfo{
 		Volume:  daemon.volumes.GetDriverList(),
-		Network: daemon.GetNetworkDriverList(),
+		Network: daemon.GetNetworkDriverList(ctx),
 
 		// The authorization plugins are returned in the order they are
 		// used as they constitute a request/response modification chain.
@@ -198,9 +219,9 @@ func (daemon *Daemon) fillContainerStates(v *system.Info) {
 // this information optional (cli to request "with debugging information"), or
 // only collect it if the daemon has debug enabled. For the CLI code, see
 // https://github.com/docker/cli/blob/v20.10.12/cli/command/system/info.go#L239-L244
-func (daemon *Daemon) fillDebugInfo(v *system.Info) {
+func (daemon *Daemon) fillDebugInfo(ctx context.Context, v *system.Info) {
 	v.Debug = debug.IsEnabled()
-	v.NFd = fileutils.GetTotalUsedFds()
+	v.NFd = fileutils.GetTotalUsedFds(ctx)
 	v.NGoroutines = runtime.NumGoroutine()
 	v.NEventsListener = daemon.EventsService.SubscribersCount()
 }
@@ -228,7 +249,9 @@ func (daemon *Daemon) fillAPIInfo(v *system.Info, cfg *config.Config) {
 	}
 }
 
-func (daemon *Daemon) fillDefaultAddressPools(v *system.Info, cfg *config.Config) {
+func (daemon *Daemon) fillDefaultAddressPools(ctx context.Context, v *system.Info, cfg *config.Config) {
+	_, span := tracing.StartSpan(ctx, "fillDefaultAddressPools")
+	defer span.End()
 	for _, pool := range cfg.DefaultAddressPools.Value() {
 		v.DefaultAddressPools = append(v.DefaultAddressPools, system.NetworkAddressPool{
 			Base: pool.Base,
@@ -237,45 +260,56 @@ func (daemon *Daemon) fillDefaultAddressPools(v *system.Info, cfg *config.Config
 	}
 }
 
-func hostName() string {
+func hostName(ctx context.Context) string {
+	ctx, span := tracing.StartSpan(ctx, "hostName")
+	defer span.End()
 	hostname := ""
 	if hn, err := os.Hostname(); err != nil {
-		log.G(context.TODO()).Warnf("Could not get hostname: %v", err)
+		log.G(ctx).Warnf("Could not get hostname: %v", err)
 	} else {
 		hostname = hn
 	}
 	return hostname
 }
 
-func kernelVersion() string {
+func kernelVersion(ctx context.Context) string {
+	ctx, span := tracing.StartSpan(ctx, "kernelVersion")
+	defer span.End()
+
 	var kernelVersion string
 	if kv, err := kernel.GetKernelVersion(); err != nil {
-		log.G(context.TODO()).Warnf("Could not get kernel version: %v", err)
+		log.G(ctx).Warnf("Could not get kernel version: %v", err)
 	} else {
 		kernelVersion = kv.String()
 	}
 	return kernelVersion
 }
 
-func memInfo() *meminfo.Memory {
+func memInfo(ctx context.Context) *meminfo.Memory {
+	ctx, span := tracing.StartSpan(ctx, "memInfo")
+	defer span.End()
+
 	memInfo, err := meminfo.Read()
 	if err != nil {
-		log.G(context.TODO()).Errorf("Could not read system memory info: %v", err)
+		log.G(ctx).Errorf("Could not read system memory info: %v", err)
 		memInfo = &meminfo.Memory{}
 	}
 	return memInfo
 }
 
-func operatingSystem() (operatingSystem string) {
+func operatingSystem(ctx context.Context) (operatingSystem string) {
+	ctx, span := tracing.StartSpan(ctx, "operatingSystem")
+	defer span.End()
+
 	defer metrics.StartTimer(hostInfoFunctions.WithValues("operating_system"))()
 
 	if s, err := operatingsystem.GetOperatingSystem(); err != nil {
-		log.G(context.TODO()).Warnf("Could not get operating system name: %v", err)
+		log.G(ctx).Warnf("Could not get operating system name: %v", err)
 	} else {
 		operatingSystem = s
 	}
 	if inContainer, err := operatingsystem.IsContainerized(); err != nil {
-		log.G(context.TODO()).Errorf("Could not determine if daemon is containerized: %v", err)
+		log.G(ctx).Errorf("Could not determine if daemon is containerized: %v", err)
 		operatingSystem += " (error determining if containerized)"
 	} else if inContainer {
 		operatingSystem += " (containerized)"
@@ -284,12 +318,15 @@ func operatingSystem() (operatingSystem string) {
 	return operatingSystem
 }
 
-func osVersion() (version string) {
+func osVersion(ctx context.Context) (version string) {
+	ctx, span := tracing.StartSpan(ctx, "osVersion")
+	defer span.End()
+
 	defer metrics.StartTimer(hostInfoFunctions.WithValues("os_version"))()
 
 	version, err := operatingsystem.GetOperatingSystemVersion()
 	if err != nil {
-		log.G(context.TODO()).Warnf("Could not get operating system version: %v", err)
+		log.G(ctx).Warnf("Could not get operating system version: %v", err)
 	}
 
 	return version

+ 186 - 84
daemon/info_unix.go

@@ -16,6 +16,7 @@ import (
 	containertypes "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/api/types/system"
 	"github.com/docker/docker/daemon/config"
+	"github.com/docker/docker/errdefs"
 	"github.com/docker/docker/pkg/rootless"
 	"github.com/docker/docker/pkg/sysinfo"
 	"github.com/pkg/errors"
@@ -23,7 +24,7 @@ import (
 )
 
 // fillPlatformInfo fills the platform related info.
-func (daemon *Daemon) fillPlatformInfo(v *system.Info, sysInfo *sysinfo.SysInfo, cfg *configStore) {
+func (daemon *Daemon) fillPlatformInfo(ctx context.Context, v *system.Info, sysInfo *sysinfo.SysInfo, cfg *configStore) error {
 	v.CgroupDriver = cgroupDriver(&cfg.Config)
 	v.CgroupVersion = "1"
 	if sysInfo.CgroupUnified {
@@ -57,36 +58,20 @@ func (daemon *Daemon) fillPlatformInfo(v *system.Info, sysInfo *sysinfo.SysInfo,
 	v.ContainerdCommit.ID = "N/A"
 	v.InitCommit.ID = "N/A"
 
-	if _, _, commit, err := parseDefaultRuntimeVersion(&cfg.Runtimes); err != nil {
-		log.G(context.TODO()).Warnf(err.Error())
-	} else {
-		v.RuncCommit.ID = commit
+	if err := populateRuncCommit(&v.RuncCommit, cfg); err != nil {
+		log.G(ctx).WithError(err).Warn("Failed to retrieve default runtime version")
 	}
 
-	if rv, err := daemon.containerd.Version(context.Background()); err == nil {
-		v.ContainerdCommit.ID = rv.Revision
-	} else {
-		log.G(context.TODO()).Warnf("failed to retrieve containerd version: %v", err)
+	if err := daemon.populateContainerdCommit(ctx, &v.ContainerdCommit); err != nil {
+		return err
 	}
 
-	v.InitBinary = cfg.GetInitPath()
-	if initBinary, err := cfg.LookupInitPath(); err != nil {
-		log.G(context.TODO()).Warnf("failed to find docker-init: %s", err)
-	} else if rv, err := exec.Command(initBinary, "--version").Output(); err == nil {
-		if _, commit, err := parseInitVersion(string(rv)); err != nil {
-			log.G(context.TODO()).Warnf("failed to parse %s version: %s", initBinary, err)
-		} else {
-			v.InitCommit.ID = commit
-		}
-	} else {
-		log.G(context.TODO()).Warnf("failed to retrieve %s version: %s", initBinary, err)
+	if err := daemon.populateInitCommit(ctx, v, cfg); err != nil {
+		return err
 	}
 
 	// Set expected and actual commits to the same value to prevent the client
 	// showing that the version does not match the "expected" version/commit.
-	v.RuncCommit.Expected = v.RuncCommit.ID
-	v.ContainerdCommit.Expected = v.ContainerdCommit.ID
-	v.InitCommit.Expected = v.InitCommit.ID
 
 	if v.CgroupDriver == cgroupNoneDriver {
 		if v.CgroupVersion == "2" {
@@ -171,65 +156,79 @@ func (daemon *Daemon) fillPlatformInfo(v *system.Info, sysInfo *sysinfo.SysInfo,
 	if !v.BridgeNfIP6tables {
 		v.Warnings = append(v.Warnings, "WARNING: bridge-nf-call-ip6tables is disabled")
 	}
+	return nil
 }
 
-func (daemon *Daemon) fillPlatformVersion(v *types.Version, cfg *configStore) {
-	if rv, err := daemon.containerd.Version(context.Background()); err == nil {
-		v.Components = append(v.Components, types.ComponentVersion{
-			Name:    "containerd",
-			Version: rv.Version,
-			Details: map[string]string{
-				"GitCommit": rv.Revision,
-			},
-		})
+func (daemon *Daemon) fillPlatformVersion(ctx context.Context, v *types.Version, cfg *configStore) error {
+	if err := daemon.populateContainerdVersion(ctx, v); err != nil {
+		return err
 	}
 
-	if _, ver, commit, err := parseDefaultRuntimeVersion(&cfg.Runtimes); err != nil {
-		log.G(context.TODO()).Warnf(err.Error())
-	} else {
-		v.Components = append(v.Components, types.ComponentVersion{
-			Name:    cfg.Runtimes.Default,
-			Version: ver,
-			Details: map[string]string{
-				"GitCommit": commit,
-			},
-		})
-	}
-
-	if initBinary, err := cfg.LookupInitPath(); err != nil {
-		log.G(context.TODO()).Warnf("failed to find docker-init: %s", err)
-	} else if rv, err := exec.Command(initBinary, "--version").Output(); err == nil {
-		if ver, commit, err := parseInitVersion(string(rv)); err != nil {
-			log.G(context.TODO()).Warnf("failed to parse %s version: %s", initBinary, err)
-		} else {
-			v.Components = append(v.Components, types.ComponentVersion{
-				Name:    filepath.Base(initBinary),
-				Version: ver,
-				Details: map[string]string{
-					"GitCommit": commit,
-				},
-			})
+	if err := populateRuncVersion(cfg, v); err != nil {
+		log.G(ctx).WithError(err).Warn("Failed to retrieve default runtime version")
+	}
+
+	if err := populateInitVersion(ctx, cfg, v); err != nil {
+		return err
+	}
+
+	if err := daemon.fillRootlessVersion(ctx, v); err != nil {
+		if errdefs.IsContext(err) {
+			return err
 		}
-	} else {
-		log.G(context.TODO()).Warnf("failed to retrieve %s version: %s", initBinary, err)
+		log.G(ctx).WithError(err).Warn("Failed to fill rootless version")
 	}
+	return nil
+}
 
-	daemon.fillRootlessVersion(v)
+func populateRuncCommit(v *system.Commit, cfg *configStore) error {
+	_, _, commit, err := parseDefaultRuntimeVersion(&cfg.Runtimes)
+	if err != nil {
+		return err
+	}
+	v.ID = commit
+	v.Expected = commit
+	return nil
 }
 
-func (daemon *Daemon) fillRootlessVersion(v *types.Version) {
+func (daemon *Daemon) populateInitCommit(ctx context.Context, v *system.Info, cfg *configStore) error {
+	v.InitBinary = cfg.GetInitPath()
+	initBinary, err := cfg.LookupInitPath()
+	if err != nil {
+		log.G(ctx).WithError(err).Warnf("Failed to find docker-init")
+		return nil
+	}
+
+	rv, err := exec.CommandContext(ctx, initBinary, "--version").Output()
+	if err != nil {
+		if errdefs.IsContext(err) {
+			return err
+		}
+		log.G(ctx).WithError(err).Warnf("Failed to retrieve %s version", initBinary)
+		return nil
+	}
+
+	_, commit, err := parseInitVersion(string(rv))
+	if err != nil {
+		log.G(ctx).WithError(err).Warnf("failed to parse %s version", initBinary)
+		return nil
+	}
+	v.InitCommit.ID = commit
+	v.InitCommit.Expected = v.InitCommit.ID
+	return nil
+}
+
+func (daemon *Daemon) fillRootlessVersion(ctx context.Context, v *types.Version) error {
 	if !rootless.RunningWithRootlessKit() {
-		return
+		return nil
 	}
 	rlc, err := getRootlessKitClient()
 	if err != nil {
-		log.G(context.TODO()).Warnf("failed to create RootlessKit client: %v", err)
-		return
+		return errors.Wrap(err, "failed to create RootlessKit client")
 	}
-	rlInfo, err := rlc.Info(context.TODO())
+	rlInfo, err := rlc.Info(ctx)
 	if err != nil {
-		log.G(context.TODO()).Warnf("failed to retrieve RootlessKit version: %v", err)
-		return
+		return errors.Wrap(err, "failed to retrieve RootlessKit version")
 	}
 	v.Components = append(v.Components, types.ComponentVersion{
 		Name:    "rootlesskit",
@@ -244,31 +243,54 @@ func (daemon *Daemon) fillRootlessVersion(v *types.Version) {
 
 	switch rlInfo.NetworkDriver.Driver {
 	case "slirp4netns":
-		if rv, err := exec.Command("slirp4netns", "--version").Output(); err == nil {
-			if _, ver, commit, err := parseRuntimeVersion(string(rv)); err != nil {
-				log.G(context.TODO()).Warnf("failed to parse slirp4netns version: %v", err)
-			} else {
-				v.Components = append(v.Components, types.ComponentVersion{
-					Name:    "slirp4netns",
-					Version: ver,
-					Details: map[string]string{
-						"GitCommit": commit,
-					},
-				})
+		err = func() error {
+			rv, err := exec.CommandContext(ctx, "slirp4netns", "--version").Output()
+			if err != nil {
+				if errdefs.IsContext(err) {
+					return err
+				}
+				log.G(ctx).WithError(err).Warn("Failed to retrieve slirp4netns version")
+				return nil
 			}
-		} else {
-			log.G(context.TODO()).Warnf("failed to retrieve slirp4netns version: %v", err)
+
+			_, ver, commit, err := parseRuntimeVersion(string(rv))
+			if err != nil {
+				log.G(ctx).WithError(err).Warn("Failed to parse slirp4netns version")
+				return nil
+			}
+			v.Components = append(v.Components, types.ComponentVersion{
+				Name:    "slirp4netns",
+				Version: ver,
+				Details: map[string]string{
+					"GitCommit": commit,
+				},
+			})
+			return nil
+		}()
+		if err != nil {
+			return err
 		}
 	case "vpnkit":
-		if rv, err := exec.Command("vpnkit", "--version").Output(); err == nil {
+		err = func() error {
+			out, err := exec.CommandContext(ctx, "vpnkit", "--version").Output()
+			if err != nil {
+				if errdefs.IsContext(err) {
+					return err
+				}
+				log.G(ctx).WithError(err).Warn("Failed to retrieve vpnkit version")
+				return nil
+			}
 			v.Components = append(v.Components, types.ComponentVersion{
 				Name:    "vpnkit",
-				Version: strings.TrimSpace(string(rv)),
+				Version: strings.TrimSpace(strings.TrimSpace(string(out))),
 			})
-		} else {
-			log.G(context.TODO()).Warnf("failed to retrieve vpnkit version: %v", err)
+			return nil
+		}()
+		if err != nil {
+			return err
 		}
 	}
+	return nil
 }
 
 // getRootlessKitClient returns RootlessKit client
@@ -384,3 +406,83 @@ func Rootless(cfg *config.Config) bool {
 func noNewPrivileges(cfg *config.Config) bool {
 	return cfg.NoNewPrivileges
 }
+
+func (daemon *Daemon) populateContainerdCommit(ctx context.Context, v *system.Commit) error {
+	rv, err := daemon.containerd.Version(ctx)
+	if err != nil {
+		if errdefs.IsContext(err) {
+			return err
+		}
+		log.G(ctx).WithError(err).Warnf("Failed to retrieve containerd version")
+		return nil
+	}
+	v.ID = rv.Revision
+	v.Expected = rv.Revision
+	return nil
+}
+
+func (daemon *Daemon) populateContainerdVersion(ctx context.Context, v *types.Version) error {
+	rv, err := daemon.containerd.Version(ctx)
+	if err != nil {
+		if errdefs.IsContext(err) {
+			return err
+		}
+		log.G(ctx).WithError(err).Warn("Failed to retrieve containerd version")
+		return nil
+	}
+
+	v.Components = append(v.Components, types.ComponentVersion{
+		Name:    "containerd",
+		Version: rv.Version,
+		Details: map[string]string{
+			"GitCommit": rv.Revision,
+		},
+	})
+	return nil
+}
+
+func populateRuncVersion(cfg *configStore, v *types.Version) error {
+	_, ver, commit, err := parseDefaultRuntimeVersion(&cfg.Runtimes)
+	if err != nil {
+		return err
+	}
+	v.Components = append(v.Components, types.ComponentVersion{
+		Name:    cfg.Runtimes.Default,
+		Version: ver,
+		Details: map[string]string{
+			"GitCommit": commit,
+		},
+	})
+	return nil
+}
+
+func populateInitVersion(ctx context.Context, cfg *configStore, v *types.Version) error {
+	initBinary, err := cfg.LookupInitPath()
+	if err != nil {
+		log.G(ctx).WithError(err).Warn("Failed to find docker-init")
+		return nil
+	}
+
+	rv, err := exec.CommandContext(ctx, initBinary, "--version").Output()
+	if err != nil {
+		if errdefs.IsContext(err) {
+			return err
+		}
+		log.G(ctx).WithError(err).Warnf("Failed to retrieve %s version", initBinary)
+		return nil
+	}
+
+	ver, commit, err := parseInitVersion(string(rv))
+	if err != nil {
+		log.G(ctx).WithError(err).Warnf("failed to parse %s version", initBinary)
+		return nil
+	}
+	v.Components = append(v.Components, types.ComponentVersion{
+		Name:    filepath.Base(initBinary),
+		Version: ver,
+		Details: map[string]string{
+			"GitCommit": commit,
+		},
+	})
+	return nil
+}

+ 7 - 2
daemon/info_windows.go

@@ -1,6 +1,8 @@
 package daemon // import "github.com/docker/docker/daemon"
 
 import (
+	"context"
+
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/system"
 	"github.com/docker/docker/daemon/config"
@@ -8,10 +10,13 @@ import (
 )
 
 // fillPlatformInfo fills the platform related info.
-func (daemon *Daemon) fillPlatformInfo(v *system.Info, sysInfo *sysinfo.SysInfo, cfg *configStore) {
+func (daemon *Daemon) fillPlatformInfo(ctx context.Context, v *system.Info, sysInfo *sysinfo.SysInfo, cfg *configStore) error {
+	return nil
 }
 
-func (daemon *Daemon) fillPlatformVersion(v *types.Version, cfg *configStore) {}
+func (daemon *Daemon) fillPlatformVersion(ctx context.Context, v *types.Version, cfg *configStore) error {
+	return nil
+}
 
 func fillDriverWarnings(v *system.Info) {
 }

+ 4 - 3
daemon/network.go

@@ -139,7 +139,8 @@ func (daemon *Daemon) getAllNetworks() []*libnetwork.Network {
 	if c == nil {
 		return nil
 	}
-	return c.Networks()
+	ctx := context.TODO()
+	return c.Networks(ctx)
 }
 
 type ingressJob struct {
@@ -465,7 +466,7 @@ func (daemon *Daemon) DisconnectContainerFromNetwork(containerName string, netwo
 
 // GetNetworkDriverList returns the list of plugins drivers
 // registered for network.
-func (daemon *Daemon) GetNetworkDriverList() []string {
+func (daemon *Daemon) GetNetworkDriverList(ctx context.Context) []string {
 	if !daemon.NetworkControllerEnabled() {
 		return nil
 	}
@@ -483,7 +484,7 @@ func (daemon *Daemon) GetNetworkDriverList() []string {
 		pluginMap[plugin] = true
 	}
 
-	networks := daemon.netController.Networks()
+	networks := daemon.netController.Networks(ctx)
 
 	for _, nw := range networks {
 		if !pluginMap[nw.Type()] {

+ 10 - 0
errdefs/is.go

@@ -1,5 +1,10 @@
 package errdefs
 
+import (
+	"context"
+	"errors"
+)
+
 type causer interface {
 	Cause() error
 }
@@ -105,3 +110,8 @@ func IsDataLoss(err error) bool {
 	_, ok := getImplementer(err).(ErrDataLoss)
 	return ok
 }
+
+// IsContext returns if the passed in error is due to context cancellation or deadline exceeded.
+func IsContext(err error) bool {
+	return errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)
+}

+ 3 - 3
libnetwork/controller.go

@@ -820,10 +820,10 @@ func (c *Controller) addNetwork(n *Network) error {
 }
 
 // Networks returns the list of Network(s) managed by this controller.
-func (c *Controller) Networks() []*Network {
+func (c *Controller) Networks(ctx context.Context) []*Network {
 	var list []*Network
 
-	for _, n := range c.getNetworksFromStore() {
+	for _, n := range c.getNetworksFromStore(ctx) {
 		if n.inDelete {
 			continue
 		}
@@ -835,7 +835,7 @@ func (c *Controller) Networks() []*Network {
 
 // WalkNetworks uses the provided function to walk the Network(s) managed by this controller.
 func (c *Controller) WalkNetworks(walker NetworkWalker) {
-	for _, n := range c.Networks() {
+	for _, n := range c.Networks(context.TODO()) {
 		if walker(n) {
 			return
 		}

+ 3 - 2
libnetwork/libnetwork_linux_test.go

@@ -538,7 +538,8 @@ func TestNetworkEndpointsWalkers(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	current := len(controller.Networks())
+	ctx := context.TODO()
+	current := len(controller.Networks(ctx))
 
 	// Create network 2
 	netOption = options.Generic{
@@ -558,7 +559,7 @@ func TestNetworkEndpointsWalkers(t *testing.T) {
 	}()
 
 	// Test Networks method
-	if len(controller.Networks()) != current+1 {
+	if len(controller.Networks(ctx)) != current+1 {
 		t.Fatalf("Did not find the expected number of networks")
 	}
 

+ 5 - 5
libnetwork/store.go

@@ -34,7 +34,7 @@ func (c *Controller) getStore() *datastore.Store {
 }
 
 func (c *Controller) getNetworkFromStore(nid string) (*Network, error) {
-	for _, n := range c.getNetworksFromStore() {
+	for _, n := range c.getNetworksFromStore(context.TODO()) {
 		if n.id == nid {
 			return n, nil
 		}
@@ -77,21 +77,21 @@ func (c *Controller) getNetworks() ([]*Network, error) {
 	return nl, nil
 }
 
-func (c *Controller) getNetworksFromStore() []*Network { // FIXME: unify with c.getNetworks()
+func (c *Controller) getNetworksFromStore(ctx context.Context) []*Network { // FIXME: unify with c.getNetworks()
 	var nl []*Network
 
 	store := c.getStore()
 	kvol, err := store.List(datastore.Key(datastore.NetworkKeyPrefix), &Network{ctrlr: c})
 	if err != nil {
 		if err != datastore.ErrKeyNotFound {
-			log.G(context.TODO()).Debugf("failed to get networks from store: %v", err)
+			log.G(ctx).Debugf("failed to get networks from store: %v", err)
 		}
 		return nil
 	}
 
 	kvep, err := store.Map(datastore.Key(epCntKeyPrefix), &endpointCnt{})
 	if err != nil && err != datastore.ErrKeyNotFound {
-		log.G(context.TODO()).Warnf("failed to get endpoint_count map from store: %v", err)
+		log.G(ctx).Warnf("failed to get endpoint_count map from store: %v", err)
 	}
 
 	for _, kvo := range kvol {
@@ -185,7 +185,7 @@ retry:
 }
 
 func (c *Controller) networkCleanup() {
-	for _, n := range c.getNetworksFromStore() {
+	for _, n := range c.getNetworksFromStore(context.TODO()) {
 		if n.inDelete {
 			log.G(context.TODO()).Infof("Removing stale network %s (%s)", n.Name(), n.ID())
 			if err := n.delete(true, true); err != nil {

+ 14 - 3
pkg/fileutils/fileutils_linux.go

@@ -6,13 +6,17 @@ import (
 	"io"
 	"os"
 
+	"github.com/containerd/containerd/tracing"
 	"github.com/containerd/log"
 	"golang.org/x/sys/unix"
 )
 
 // GetTotalUsedFds Returns the number of used File Descriptors by
 // reading it via /proc filesystem.
-func GetTotalUsedFds() int {
+func GetTotalUsedFds(ctx context.Context) int {
+	ctx, span := tracing.StartSpan(ctx, "GetTotalUsedFds")
+	defer span.End()
+
 	name := fmt.Sprintf("/proc/%d/fd", os.Getpid())
 
 	// Fast-path for Linux 6.2 (since [f1f1f2569901ec5b9d425f2e91c09a0e320768f3]).
@@ -30,19 +34,26 @@ func GetTotalUsedFds() int {
 
 	f, err := os.Open(name)
 	if err != nil {
-		log.G(context.TODO()).WithError(err).Error("Error listing file descriptors")
+		log.G(ctx).WithError(err).Error("Error listing file descriptors")
 		return -1
 	}
 	defer f.Close()
 
 	var fdCount int
 	for {
+		select {
+		case <-ctx.Done():
+			log.G(ctx).WithError(ctx.Err()).Error("Context cancelled while counting file descriptors")
+			return -1
+		default:
+		}
+
 		names, err := f.Readdirnames(100)
 		fdCount += len(names)
 		if err == io.EOF {
 			break
 		} else if err != nil {
-			log.G(context.TODO()).WithError(err).Error("Error listing file descriptors")
+			log.G(ctx).WithError(err).Error("Error listing file descriptors")
 			return -1
 		}
 	}

+ 3 - 1
pkg/fileutils/fileutils_test.go

@@ -1,6 +1,7 @@
 package fileutils // import "github.com/docker/docker/pkg/fileutils"
 
 import (
+	"context"
 	"errors"
 	"os"
 	"path"
@@ -242,8 +243,9 @@ func TestCreateIfNotExistsFile(t *testing.T) {
 }
 
 func BenchmarkGetTotalUsedFds(b *testing.B) {
+	ctx := context.Background()
 	b.ReportAllocs()
 	for i := 0; i < b.N; i++ {
-		_ = GetTotalUsedFds()
+		_ = GetTotalUsedFds(ctx)
 	}
 }

+ 3 - 1
pkg/fileutils/fileutils_windows.go

@@ -1,7 +1,9 @@
 package fileutils // import "github.com/docker/docker/pkg/fileutils"
 
+import "context"
+
 // GetTotalUsedFds Returns the number of used File Descriptors. Not supported
 // on Windows.
-func GetTotalUsedFds() int {
+func GetTotalUsedFds(ctx context.Context) int {
 	return -1
 }