From 677d41aa3b1146b0dfc8f5889bd3bd3fa9c128b2 Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Sun, 10 Sep 2023 00:05:05 +0000 Subject: [PATCH] Plumb context through info endpoint I was trying to find out why `docker info` was sometimes slow so plumbing a context through to propagate trace data through. Signed-off-by: Brian Goff --- api/server/router/system/backend.go | 6 +- api/server/router/system/system_routes.go | 12 +- daemon/cluster/cluster.go | 7 +- daemon/cluster/configs.go | 4 +- daemon/cluster/executor/backend.go | 2 +- daemon/cluster/executor/container/executor.go | 5 +- daemon/cluster/networks.go | 10 +- daemon/cluster/nodes.go | 6 +- daemon/cluster/secrets.go | 4 +- daemon/cluster/services.go | 10 +- daemon/cluster/swarm.go | 4 +- daemon/containerd/service.go | 4 +- daemon/daemon.go | 7 +- daemon/daemon_windows.go | 4 +- daemon/events.go | 2 +- daemon/image_service.go | 2 +- daemon/images/service.go | 2 +- daemon/info.go | 105 ++++--- daemon/info_unix.go | 268 ++++++++++++------ daemon/info_windows.go | 9 +- daemon/network.go | 7 +- errdefs/is.go | 10 + libnetwork/controller.go | 6 +- libnetwork/libnetwork_linux_test.go | 5 +- libnetwork/store.go | 10 +- pkg/fileutils/fileutils_linux.go | 17 +- pkg/fileutils/fileutils_test.go | 4 +- pkg/fileutils/fileutils_windows.go | 4 +- 28 files changed, 368 insertions(+), 168 deletions(-) diff --git a/api/server/router/system/backend.go b/api/server/router/system/backend.go index 0bfd2d25ac..d1d39a4cfc 100644 --- a/api/server/router/system/backend.go +++ b/api/server/router/system/backend.go @@ -27,8 +27,8 @@ type DiskUsageOptions struct { // Backend is the methods that need to be implemented to provide // system specific functionality. type Backend interface { - SystemInfo() *system.Info - SystemVersion() types.Version + SystemInfo(context.Context) (*system.Info, error) + SystemVersion(context.Context) (types.Version, error) SystemDiskUsage(ctx context.Context, opts DiskUsageOptions) (*types.DiskUsage, error) SubscribeToEvents(since, until time.Time, ef filters.Args) ([]events.Message, chan interface{}) UnsubscribeFromEvents(chan interface{}) @@ -38,7 +38,7 @@ type Backend interface { // ClusterBackend is all the methods that need to be implemented // to provide cluster system specific functionality. type ClusterBackend interface { - Info() swarm.Info + Info(context.Context) swarm.Info } // StatusProvider provides methods to get the swarm status of the current node. diff --git a/api/server/router/system/system_routes.go b/api/server/router/system/system_routes.go index de5501f68f..613b04322f 100644 --- a/api/server/router/system/system_routes.go +++ b/api/server/router/system/system_routes.go @@ -60,10 +60,13 @@ func (s *systemRouter) swarmStatus() string { func (s *systemRouter) getInfo(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { version := httputils.VersionFromContext(ctx) info, _, _ := s.collectSystemInfo.Do(ctx, version, func(ctx context.Context) (*system.Info, error) { - info := s.backend.SystemInfo() + info, err := s.backend.SystemInfo(ctx) + if err != nil { + return nil, err + } if s.cluster != nil { - info.Swarm = s.cluster.Info() + info.Swarm = s.cluster.Info(ctx) info.Warnings = append(info.Warnings, info.Swarm.Warnings...) } @@ -97,7 +100,10 @@ func (s *systemRouter) getInfo(ctx context.Context, w http.ResponseWriter, r *ht } func (s *systemRouter) getVersion(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { - info := s.backend.SystemVersion() + info, err := s.backend.SystemVersion(ctx) + if err != nil { + return err + } return httputils.WriteJSON(w, http.StatusOK, info) } diff --git a/daemon/cluster/cluster.go b/daemon/cluster/cluster.go index 3b00449600..1762f451f5 100644 --- a/daemon/cluster/cluster.go +++ b/daemon/cluster/cluster.go @@ -249,8 +249,8 @@ func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) { return nr, nil } -func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost - return context.WithTimeout(context.Background(), swarmRequestTimeout) +func (c *Cluster) getRequestContext(ctx context.Context) (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost + return context.WithTimeout(ctx, swarmRequestTimeout) } // IsManager returns true if Cluster is participating as a manager. @@ -443,7 +443,8 @@ func (c *Cluster) lockedManagerAction(fn func(ctx context.Context, state nodeSta return c.errNoManager(state) } - ctx, cancel := c.getRequestContext() + ctx := context.TODO() + ctx, cancel := c.getRequestContext(ctx) defer cancel() return fn(ctx, state) diff --git a/daemon/cluster/configs.go b/daemon/cluster/configs.go index d4f28a3c6c..57dea9fe77 100644 --- a/daemon/cluster/configs.go +++ b/daemon/cluster/configs.go @@ -41,7 +41,9 @@ func (c *Cluster) GetConfigs(options apitypes.ConfigListOptions) ([]types.Config if err != nil { return nil, err } - ctx, cancel := c.getRequestContext() + + ctx := context.TODO() + ctx, cancel := c.getRequestContext(ctx) defer cancel() r, err := state.controlClient.ListConfigs(ctx, diff --git a/daemon/cluster/executor/backend.go b/daemon/cluster/executor/backend.go index 3813fc1c50..976a434545 100644 --- a/daemon/cluster/executor/backend.go +++ b/daemon/cluster/executor/backend.go @@ -53,7 +53,7 @@ type Backend interface { SetContainerDependencyStore(name string, store exec.DependencyGetter) error SetContainerSecretReferences(name string, refs []*swarm.SecretReference) error SetContainerConfigReferences(name string, refs []*swarm.ConfigReference) error - SystemInfo() *system.Info + SystemInfo(context.Context) (*system.Info, error) Containers(ctx context.Context, config *container.ListOptions) ([]*types.Container, error) SetNetworkBootstrapKeys([]*networktypes.EncryptionKey) error DaemonJoinsCluster(provider cluster.Provider) diff --git a/daemon/cluster/executor/container/executor.go b/daemon/cluster/executor/container/executor.go index 113d65fc19..1f506a4fa4 100644 --- a/daemon/cluster/executor/container/executor.go +++ b/daemon/cluster/executor/container/executor.go @@ -58,7 +58,10 @@ func NewExecutor(b executorpkg.Backend, p plugin.Backend, i executorpkg.ImageBac // Describe returns the underlying node description from the docker client. func (e *executor) Describe(ctx context.Context) (*api.NodeDescription, error) { - info := e.backend.SystemInfo() + info, err := e.backend.SystemInfo(ctx) + if err != nil { + return nil, err + } plugins := map[api.PluginDescription]struct{}{} addPlugins := func(typ string, names []string) { diff --git a/daemon/cluster/networks.go b/daemon/cluster/networks.go index 00d076e0a0..bcb41545eb 100644 --- a/daemon/cluster/networks.go +++ b/daemon/cluster/networks.go @@ -12,6 +12,7 @@ import ( "github.com/docker/docker/daemon/cluster/convert" internalnetwork "github.com/docker/docker/daemon/network" "github.com/docker/docker/errdefs" + "github.com/docker/docker/internal/compatcontext" "github.com/docker/docker/runconfig" swarmapi "github.com/moby/swarmkit/v2/api" "github.com/pkg/errors" @@ -68,7 +69,8 @@ func (c *Cluster) getNetworks(filters *swarmapi.ListNetworksRequest_Filters) ([] return nil, c.errNoManager(state) } - ctx, cancel := c.getRequestContext() + ctx := context.TODO() + ctx, cancel := c.getRequestContext(ctx) defer cancel() r, err := state.controlClient.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: filters}) @@ -203,7 +205,8 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s } c.mu.Unlock() - ctx, cancel := c.getRequestContext() + ctx := context.TODO() + ctx, cancel := c.getRequestContext(ctx) defer cancel() taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses) @@ -222,7 +225,8 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s log.G(ctx).Debugf("Successfully attached to network %s with task id %s", target, taskID) release := func() { - ctx, cancel := c.getRequestContext() + ctx := compatcontext.WithoutCancel(ctx) + ctx, cancel := c.getRequestContext(ctx) defer cancel() if err := agent.ResourceAllocator().DetachNetwork(ctx, taskID); err != nil { log.G(ctx).Errorf("Failed remove network attachment %s to network %s on allocation failure: %v", diff --git a/daemon/cluster/nodes.go b/daemon/cluster/nodes.go index 7f643d833a..46d20e2eff 100644 --- a/daemon/cluster/nodes.go +++ b/daemon/cluster/nodes.go @@ -26,7 +26,8 @@ func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, erro return nil, err } - ctx, cancel := c.getRequestContext() + ctx := context.TODO() + ctx, cancel := c.getRequestContext(ctx) defer cancel() r, err := state.controlClient.ListNodes( @@ -72,7 +73,8 @@ func (c *Cluster) UpdateNode(input string, version uint64, spec types.NodeSpec) return errdefs.InvalidParameter(err) } - ctx, cancel := c.getRequestContext() + ctx := context.TODO() + ctx, cancel := c.getRequestContext(ctx) defer cancel() currentNode, err := getNode(ctx, state.controlClient, input) diff --git a/daemon/cluster/secrets.go b/daemon/cluster/secrets.go index bbc2fdb8d0..d4ee0727f3 100644 --- a/daemon/cluster/secrets.go +++ b/daemon/cluster/secrets.go @@ -41,7 +41,9 @@ func (c *Cluster) GetSecrets(options apitypes.SecretListOptions) ([]types.Secret if err != nil { return nil, err } - ctx, cancel := c.getRequestContext() + + ctx := context.TODO() + ctx, cancel := c.getRequestContext(ctx) defer cancel() r, err := state.controlClient.ListSecrets(ctx, diff --git a/daemon/cluster/services.go b/daemon/cluster/services.go index 5164cd0846..b267df52a0 100644 --- a/daemon/cluster/services.go +++ b/daemon/cluster/services.go @@ -21,6 +21,7 @@ import ( timetypes "github.com/docker/docker/api/types/time" "github.com/docker/docker/daemon/cluster/convert" "github.com/docker/docker/errdefs" + "github.com/docker/docker/internal/compatcontext" runconfigopts "github.com/docker/docker/runconfig/opts" gogotypes "github.com/gogo/protobuf/types" swarmapi "github.com/moby/swarmkit/v2/api" @@ -65,7 +66,8 @@ func (c *Cluster) GetServices(options types.ServiceListOptions) ([]swarm.Service Runtimes: options.Filters.Get("runtime"), } - ctx, cancel := c.getRequestContext() + ctx := context.TODO() + ctx, cancel := c.getRequestContext(ctx) defer cancel() r, err := state.controlClient.ListServices( @@ -263,7 +265,8 @@ func (c *Cluster) CreateService(s swarm.ServiceSpec, encodedAuth string, queryRe // "ctx" could make it impossible to create a service // if the registry is slow or unresponsive. var cancel func() - ctx, cancel = c.getRequestContext() + ctx = compatcontext.WithoutCancel(ctx) + ctx, cancel = c.getRequestContext(ctx) defer cancel() } @@ -378,7 +381,8 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec swa // "ctx" could make it impossible to update a service // if the registry is slow or unresponsive. var cancel func() - ctx, cancel = c.getRequestContext() + ctx = compatcontext.WithoutCancel(ctx) + ctx, cancel = c.getRequestContext(ctx) defer cancel() } } diff --git a/daemon/cluster/swarm.go b/daemon/cluster/swarm.go index 1a820d67e6..e81c838a48 100644 --- a/daemon/cluster/swarm.go +++ b/daemon/cluster/swarm.go @@ -429,7 +429,7 @@ func (c *Cluster) Leave(ctx context.Context, force bool) error { } // Info returns information about the current cluster state. -func (c *Cluster) Info() types.Info { +func (c *Cluster) Info(ctx context.Context) types.Info { info := types.Info{ NodeAddr: c.GetAdvertiseAddress(), } @@ -442,7 +442,7 @@ func (c *Cluster) Info() types.Info { info.Error = state.err.Error() } - ctx, cancel := c.getRequestContext() + ctx, cancel := c.getRequestContext(ctx) defer cancel() if state.IsActiveManager() { diff --git a/daemon/containerd/service.go b/daemon/containerd/service.go index 94bf2215b3..dba2305751 100644 --- a/daemon/containerd/service.go +++ b/daemon/containerd/service.go @@ -76,8 +76,8 @@ func (i *ImageService) DistributionServices() images.DistributionServices { // CountImages returns the number of images stored by ImageService // called from info.go -func (i *ImageService) CountImages() int { - imgs, err := i.client.ListImages(context.TODO()) +func (i *ImageService) CountImages(ctx context.Context) int { + imgs, err := i.client.ListImages(ctx) if err != nil { return 0 } diff --git a/daemon/daemon.go b/daemon/daemon.go index be4c12d4e9..b3164a288e 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -1175,7 +1175,10 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S } close(d.startupDone) - info := d.SystemInfo() + info, err := d.SystemInfo(ctx) + if err != nil { + return nil, err + } for _, w := range info.Warnings { log.G(ctx).Warn(w) } @@ -1349,7 +1352,7 @@ func (daemon *Daemon) Subnets() ([]net.IPNet, []net.IPNet) { var v4Subnets []net.IPNet var v6Subnets []net.IPNet - for _, managedNetwork := range daemon.netController.Networks() { + for _, managedNetwork := range daemon.netController.Networks(context.TODO()) { v4infos, v6infos := managedNetwork.IpamInfo() for _, info := range v4infos { if info.IPAMData.Pool != nil { diff --git a/daemon/daemon_windows.go b/daemon/daemon_windows.go index d1f004a537..44607a602d 100644 --- a/daemon/daemon_windows.go +++ b/daemon/daemon_windows.go @@ -247,8 +247,10 @@ func (daemon *Daemon) initNetworkController(daemonCfg *config.Config, activeSand return err } + ctx := context.TODO() + // Remove networks not present in HNS - for _, v := range daemon.netController.Networks() { + for _, v := range daemon.netController.Networks(ctx) { hnsid := v.DriverOptions()[winlibnetwork.HNSID] found := false diff --git a/daemon/events.go b/daemon/events.go index 31def32323..dfcf7db79e 100644 --- a/daemon/events.go +++ b/daemon/events.go @@ -68,7 +68,7 @@ func (daemon *Daemon) LogNetworkEventWithAttributes(nw *libnetwork.Network, acti // LogDaemonEventWithAttributes generates an event related to the daemon itself with specific given attributes. func (daemon *Daemon) LogDaemonEventWithAttributes(action events.Action, attributes map[string]string) { if daemon.EventsService != nil { - if name := hostName(); name != "" { + if name := hostName(context.TODO()); name != "" { attributes["name"] = name } daemon.EventsService.Log(action, events.DaemonEventType, events.Actor{ diff --git a/daemon/image_service.go b/daemon/image_service.go index 50105c5304..56c1e402bc 100644 --- a/daemon/image_service.go +++ b/daemon/image_service.go @@ -36,7 +36,7 @@ type ImageService interface { LoadImage(ctx context.Context, inTar io.ReadCloser, outStream io.Writer, quiet bool) error Images(ctx context.Context, opts types.ImageListOptions) ([]*imagetype.Summary, error) LogImageEvent(imageID, refName string, action events.Action) - CountImages() int + CountImages(ctx context.Context) int ImagesPrune(ctx context.Context, pruneFilters filters.Args) (*types.ImagesPruneReport, error) ImportImage(ctx context.Context, ref reference.Named, platform *ocispec.Platform, msg string, layerReader io.Reader, changes []string) (image.ID, error) TagImage(ctx context.Context, imageID image.ID, newTag reference.Named) error diff --git a/daemon/images/service.go b/daemon/images/service.go index 0cddfd052c..5bc46fa097 100644 --- a/daemon/images/service.go +++ b/daemon/images/service.go @@ -102,7 +102,7 @@ func (i *ImageService) DistributionServices() DistributionServices { // CountImages returns the number of images stored by ImageService // called from info.go -func (i *ImageService) CountImages() int { +func (i *ImageService) CountImages(ctx context.Context) int { return i.imageStore.Len() } diff --git a/daemon/info.go b/daemon/info.go index 4587814c2d..a2668420ff 100644 --- a/daemon/info.go +++ b/daemon/info.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/containerd/containerd/tracing" "github.com/containerd/log" "github.com/docker/docker/api" "github.com/docker/docker/api/types" @@ -27,8 +28,19 @@ import ( "github.com/opencontainers/selinux/go-selinux" ) +func doWithTrace[T any](ctx context.Context, name string, f func() T) T { + _, span := tracing.StartSpan(ctx, name) + defer span.End() + return f() +} + // SystemInfo returns information about the host server the daemon is running on. -func (daemon *Daemon) SystemInfo() *system.Info { +// +// The only error this should return is due to context cancellation/deadline. +// Anything else should be logged and ignored because this is looking up +// multiple things and is often used for debugging. +// The only case valid early return is when the caller doesn't want the result anymore (ie context cancelled). +func (daemon *Daemon) SystemInfo(ctx context.Context) (*system.Info, error) { defer metrics.StartTimer(hostInfoFunctions.WithValues("system_info"))() sysInfo := daemon.RawSysInfo() @@ -36,22 +48,22 @@ func (daemon *Daemon) SystemInfo() *system.Info { v := &system.Info{ ID: daemon.id, - Images: daemon.imageService.CountImages(), + Images: daemon.imageService.CountImages(ctx), IPv4Forwarding: !sysInfo.IPv4ForwardingDisabled, BridgeNfIptables: !sysInfo.BridgeNFCallIPTablesDisabled, BridgeNfIP6tables: !sysInfo.BridgeNFCallIP6TablesDisabled, - Name: hostName(), + Name: hostName(ctx), SystemTime: time.Now().Format(time.RFC3339Nano), LoggingDriver: daemon.defaultLogConfig.Type, - KernelVersion: kernelVersion(), - OperatingSystem: operatingSystem(), - OSVersion: osVersion(), + KernelVersion: kernelVersion(ctx), + OperatingSystem: operatingSystem(ctx), + OSVersion: osVersion(ctx), IndexServerAddress: registry.IndexServer, OSType: runtime.GOOS, Architecture: platform.Architecture, - RegistryConfig: daemon.registryService.ServiceConfig(), - NCPU: sysinfo.NumCPU(), - MemTotal: memInfo().MemTotal, + RegistryConfig: doWithTrace(ctx, "registry.ServiceConfig", daemon.registryService.ServiceConfig), + NCPU: doWithTrace(ctx, "sysinfo.NumCPU", sysinfo.NumCPU), + MemTotal: memInfo(ctx).MemTotal, GenericResources: daemon.genericResources, DockerRootDir: cfg.Root, Labels: cfg.Labels, @@ -66,24 +78,31 @@ func (daemon *Daemon) SystemInfo() *system.Info { } daemon.fillContainerStates(v) - daemon.fillDebugInfo(v) + daemon.fillDebugInfo(ctx, v) daemon.fillAPIInfo(v, &cfg.Config) // Retrieve platform specific info - daemon.fillPlatformInfo(v, sysInfo, cfg) + if err := daemon.fillPlatformInfo(ctx, v, sysInfo, cfg); err != nil { + return nil, err + } daemon.fillDriverInfo(v) - daemon.fillPluginsInfo(v, &cfg.Config) + daemon.fillPluginsInfo(ctx, v, &cfg.Config) daemon.fillSecurityOptions(v, sysInfo, &cfg.Config) daemon.fillLicense(v) - daemon.fillDefaultAddressPools(v, &cfg.Config) + daemon.fillDefaultAddressPools(ctx, v, &cfg.Config) - return v + return v, nil } // SystemVersion returns version information about the daemon. -func (daemon *Daemon) SystemVersion() types.Version { +// +// The only error this should return is due to context cancellation/deadline. +// Anything else should be logged and ignored because this is looking up +// multiple things and is often used for debugging. +// The only case valid early return is when the caller doesn't want the result anymore (ie context cancelled). +func (daemon *Daemon) SystemVersion(ctx context.Context) (types.Version, error) { defer metrics.StartTimer(hostInfoFunctions.WithValues("system_version"))() - kernelVersion := kernelVersion() + kernelVersion := kernelVersion(ctx) cfg := daemon.config() v := types.Version{ @@ -120,8 +139,10 @@ func (daemon *Daemon) SystemVersion() types.Version { v.Platform.Name = dockerversion.PlatformName - daemon.fillPlatformVersion(&v, cfg) - return v + if err := daemon.fillPlatformVersion(ctx, &v, cfg); err != nil { + return v, err + } + return v, nil } func (daemon *Daemon) fillDriverInfo(v *system.Info) { @@ -140,10 +161,10 @@ WARNING: The %s storage-driver is deprecated, and will be removed in a future re fillDriverWarnings(v) } -func (daemon *Daemon) fillPluginsInfo(v *system.Info, cfg *config.Config) { +func (daemon *Daemon) fillPluginsInfo(ctx context.Context, v *system.Info, cfg *config.Config) { v.Plugins = system.PluginsInfo{ Volume: daemon.volumes.GetDriverList(), - Network: daemon.GetNetworkDriverList(), + Network: daemon.GetNetworkDriverList(ctx), // The authorization plugins are returned in the order they are // used as they constitute a request/response modification chain. @@ -198,9 +219,9 @@ func (daemon *Daemon) fillContainerStates(v *system.Info) { // this information optional (cli to request "with debugging information"), or // only collect it if the daemon has debug enabled. For the CLI code, see // https://github.com/docker/cli/blob/v20.10.12/cli/command/system/info.go#L239-L244 -func (daemon *Daemon) fillDebugInfo(v *system.Info) { +func (daemon *Daemon) fillDebugInfo(ctx context.Context, v *system.Info) { v.Debug = debug.IsEnabled() - v.NFd = fileutils.GetTotalUsedFds() + v.NFd = fileutils.GetTotalUsedFds(ctx) v.NGoroutines = runtime.NumGoroutine() v.NEventsListener = daemon.EventsService.SubscribersCount() } @@ -228,7 +249,9 @@ func (daemon *Daemon) fillAPIInfo(v *system.Info, cfg *config.Config) { } } -func (daemon *Daemon) fillDefaultAddressPools(v *system.Info, cfg *config.Config) { +func (daemon *Daemon) fillDefaultAddressPools(ctx context.Context, v *system.Info, cfg *config.Config) { + _, span := tracing.StartSpan(ctx, "fillDefaultAddressPools") + defer span.End() for _, pool := range cfg.DefaultAddressPools.Value() { v.DefaultAddressPools = append(v.DefaultAddressPools, system.NetworkAddressPool{ Base: pool.Base, @@ -237,45 +260,56 @@ func (daemon *Daemon) fillDefaultAddressPools(v *system.Info, cfg *config.Config } } -func hostName() string { +func hostName(ctx context.Context) string { + ctx, span := tracing.StartSpan(ctx, "hostName") + defer span.End() hostname := "" if hn, err := os.Hostname(); err != nil { - log.G(context.TODO()).Warnf("Could not get hostname: %v", err) + log.G(ctx).Warnf("Could not get hostname: %v", err) } else { hostname = hn } return hostname } -func kernelVersion() string { +func kernelVersion(ctx context.Context) string { + ctx, span := tracing.StartSpan(ctx, "kernelVersion") + defer span.End() + var kernelVersion string if kv, err := kernel.GetKernelVersion(); err != nil { - log.G(context.TODO()).Warnf("Could not get kernel version: %v", err) + log.G(ctx).Warnf("Could not get kernel version: %v", err) } else { kernelVersion = kv.String() } return kernelVersion } -func memInfo() *meminfo.Memory { +func memInfo(ctx context.Context) *meminfo.Memory { + ctx, span := tracing.StartSpan(ctx, "memInfo") + defer span.End() + memInfo, err := meminfo.Read() if err != nil { - log.G(context.TODO()).Errorf("Could not read system memory info: %v", err) + log.G(ctx).Errorf("Could not read system memory info: %v", err) memInfo = &meminfo.Memory{} } return memInfo } -func operatingSystem() (operatingSystem string) { +func operatingSystem(ctx context.Context) (operatingSystem string) { + ctx, span := tracing.StartSpan(ctx, "operatingSystem") + defer span.End() + defer metrics.StartTimer(hostInfoFunctions.WithValues("operating_system"))() if s, err := operatingsystem.GetOperatingSystem(); err != nil { - log.G(context.TODO()).Warnf("Could not get operating system name: %v", err) + log.G(ctx).Warnf("Could not get operating system name: %v", err) } else { operatingSystem = s } if inContainer, err := operatingsystem.IsContainerized(); err != nil { - log.G(context.TODO()).Errorf("Could not determine if daemon is containerized: %v", err) + log.G(ctx).Errorf("Could not determine if daemon is containerized: %v", err) operatingSystem += " (error determining if containerized)" } else if inContainer { operatingSystem += " (containerized)" @@ -284,12 +318,15 @@ func operatingSystem() (operatingSystem string) { return operatingSystem } -func osVersion() (version string) { +func osVersion(ctx context.Context) (version string) { + ctx, span := tracing.StartSpan(ctx, "osVersion") + defer span.End() + defer metrics.StartTimer(hostInfoFunctions.WithValues("os_version"))() version, err := operatingsystem.GetOperatingSystemVersion() if err != nil { - log.G(context.TODO()).Warnf("Could not get operating system version: %v", err) + log.G(ctx).Warnf("Could not get operating system version: %v", err) } return version diff --git a/daemon/info_unix.go b/daemon/info_unix.go index 9d92bc6143..57c6601c05 100644 --- a/daemon/info_unix.go +++ b/daemon/info_unix.go @@ -16,6 +16,7 @@ import ( containertypes "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/system" "github.com/docker/docker/daemon/config" + "github.com/docker/docker/errdefs" "github.com/docker/docker/pkg/rootless" "github.com/docker/docker/pkg/sysinfo" "github.com/pkg/errors" @@ -23,7 +24,7 @@ import ( ) // fillPlatformInfo fills the platform related info. -func (daemon *Daemon) fillPlatformInfo(v *system.Info, sysInfo *sysinfo.SysInfo, cfg *configStore) { +func (daemon *Daemon) fillPlatformInfo(ctx context.Context, v *system.Info, sysInfo *sysinfo.SysInfo, cfg *configStore) error { v.CgroupDriver = cgroupDriver(&cfg.Config) v.CgroupVersion = "1" if sysInfo.CgroupUnified { @@ -57,36 +58,20 @@ func (daemon *Daemon) fillPlatformInfo(v *system.Info, sysInfo *sysinfo.SysInfo, v.ContainerdCommit.ID = "N/A" v.InitCommit.ID = "N/A" - if _, _, commit, err := parseDefaultRuntimeVersion(&cfg.Runtimes); err != nil { - log.G(context.TODO()).Warnf(err.Error()) - } else { - v.RuncCommit.ID = commit + if err := populateRuncCommit(&v.RuncCommit, cfg); err != nil { + log.G(ctx).WithError(err).Warn("Failed to retrieve default runtime version") } - if rv, err := daemon.containerd.Version(context.Background()); err == nil { - v.ContainerdCommit.ID = rv.Revision - } else { - log.G(context.TODO()).Warnf("failed to retrieve containerd version: %v", err) + if err := daemon.populateContainerdCommit(ctx, &v.ContainerdCommit); err != nil { + return err } - v.InitBinary = cfg.GetInitPath() - if initBinary, err := cfg.LookupInitPath(); err != nil { - log.G(context.TODO()).Warnf("failed to find docker-init: %s", err) - } else if rv, err := exec.Command(initBinary, "--version").Output(); err == nil { - if _, commit, err := parseInitVersion(string(rv)); err != nil { - log.G(context.TODO()).Warnf("failed to parse %s version: %s", initBinary, err) - } else { - v.InitCommit.ID = commit - } - } else { - log.G(context.TODO()).Warnf("failed to retrieve %s version: %s", initBinary, err) + if err := daemon.populateInitCommit(ctx, v, cfg); err != nil { + return err } // Set expected and actual commits to the same value to prevent the client // showing that the version does not match the "expected" version/commit. - v.RuncCommit.Expected = v.RuncCommit.ID - v.ContainerdCommit.Expected = v.ContainerdCommit.ID - v.InitCommit.Expected = v.InitCommit.ID if v.CgroupDriver == cgroupNoneDriver { if v.CgroupVersion == "2" { @@ -171,65 +156,79 @@ func (daemon *Daemon) fillPlatformInfo(v *system.Info, sysInfo *sysinfo.SysInfo, if !v.BridgeNfIP6tables { v.Warnings = append(v.Warnings, "WARNING: bridge-nf-call-ip6tables is disabled") } + return nil } -func (daemon *Daemon) fillPlatformVersion(v *types.Version, cfg *configStore) { - if rv, err := daemon.containerd.Version(context.Background()); err == nil { - v.Components = append(v.Components, types.ComponentVersion{ - Name: "containerd", - Version: rv.Version, - Details: map[string]string{ - "GitCommit": rv.Revision, - }, - }) +func (daemon *Daemon) fillPlatformVersion(ctx context.Context, v *types.Version, cfg *configStore) error { + if err := daemon.populateContainerdVersion(ctx, v); err != nil { + return err } - if _, ver, commit, err := parseDefaultRuntimeVersion(&cfg.Runtimes); err != nil { - log.G(context.TODO()).Warnf(err.Error()) - } else { - v.Components = append(v.Components, types.ComponentVersion{ - Name: cfg.Runtimes.Default, - Version: ver, - Details: map[string]string{ - "GitCommit": commit, - }, - }) + if err := populateRuncVersion(cfg, v); err != nil { + log.G(ctx).WithError(err).Warn("Failed to retrieve default runtime version") } - if initBinary, err := cfg.LookupInitPath(); err != nil { - log.G(context.TODO()).Warnf("failed to find docker-init: %s", err) - } else if rv, err := exec.Command(initBinary, "--version").Output(); err == nil { - if ver, commit, err := parseInitVersion(string(rv)); err != nil { - log.G(context.TODO()).Warnf("failed to parse %s version: %s", initBinary, err) - } else { - v.Components = append(v.Components, types.ComponentVersion{ - Name: filepath.Base(initBinary), - Version: ver, - Details: map[string]string{ - "GitCommit": commit, - }, - }) + if err := populateInitVersion(ctx, cfg, v); err != nil { + return err + } + + if err := daemon.fillRootlessVersion(ctx, v); err != nil { + if errdefs.IsContext(err) { + return err } - } else { - log.G(context.TODO()).Warnf("failed to retrieve %s version: %s", initBinary, err) + log.G(ctx).WithError(err).Warn("Failed to fill rootless version") } - - daemon.fillRootlessVersion(v) + return nil } -func (daemon *Daemon) fillRootlessVersion(v *types.Version) { +func populateRuncCommit(v *system.Commit, cfg *configStore) error { + _, _, commit, err := parseDefaultRuntimeVersion(&cfg.Runtimes) + if err != nil { + return err + } + v.ID = commit + v.Expected = commit + return nil +} + +func (daemon *Daemon) populateInitCommit(ctx context.Context, v *system.Info, cfg *configStore) error { + v.InitBinary = cfg.GetInitPath() + initBinary, err := cfg.LookupInitPath() + if err != nil { + log.G(ctx).WithError(err).Warnf("Failed to find docker-init") + return nil + } + + rv, err := exec.CommandContext(ctx, initBinary, "--version").Output() + if err != nil { + if errdefs.IsContext(err) { + return err + } + log.G(ctx).WithError(err).Warnf("Failed to retrieve %s version", initBinary) + return nil + } + + _, commit, err := parseInitVersion(string(rv)) + if err != nil { + log.G(ctx).WithError(err).Warnf("failed to parse %s version", initBinary) + return nil + } + v.InitCommit.ID = commit + v.InitCommit.Expected = v.InitCommit.ID + return nil +} + +func (daemon *Daemon) fillRootlessVersion(ctx context.Context, v *types.Version) error { if !rootless.RunningWithRootlessKit() { - return + return nil } rlc, err := getRootlessKitClient() if err != nil { - log.G(context.TODO()).Warnf("failed to create RootlessKit client: %v", err) - return + return errors.Wrap(err, "failed to create RootlessKit client") } - rlInfo, err := rlc.Info(context.TODO()) + rlInfo, err := rlc.Info(ctx) if err != nil { - log.G(context.TODO()).Warnf("failed to retrieve RootlessKit version: %v", err) - return + return errors.Wrap(err, "failed to retrieve RootlessKit version") } v.Components = append(v.Components, types.ComponentVersion{ Name: "rootlesskit", @@ -244,31 +243,54 @@ func (daemon *Daemon) fillRootlessVersion(v *types.Version) { switch rlInfo.NetworkDriver.Driver { case "slirp4netns": - if rv, err := exec.Command("slirp4netns", "--version").Output(); err == nil { - if _, ver, commit, err := parseRuntimeVersion(string(rv)); err != nil { - log.G(context.TODO()).Warnf("failed to parse slirp4netns version: %v", err) - } else { - v.Components = append(v.Components, types.ComponentVersion{ - Name: "slirp4netns", - Version: ver, - Details: map[string]string{ - "GitCommit": commit, - }, - }) + err = func() error { + rv, err := exec.CommandContext(ctx, "slirp4netns", "--version").Output() + if err != nil { + if errdefs.IsContext(err) { + return err + } + log.G(ctx).WithError(err).Warn("Failed to retrieve slirp4netns version") + return nil } - } else { - log.G(context.TODO()).Warnf("failed to retrieve slirp4netns version: %v", err) + + _, ver, commit, err := parseRuntimeVersion(string(rv)) + if err != nil { + log.G(ctx).WithError(err).Warn("Failed to parse slirp4netns version") + return nil + } + v.Components = append(v.Components, types.ComponentVersion{ + Name: "slirp4netns", + Version: ver, + Details: map[string]string{ + "GitCommit": commit, + }, + }) + return nil + }() + if err != nil { + return err } case "vpnkit": - if rv, err := exec.Command("vpnkit", "--version").Output(); err == nil { + err = func() error { + out, err := exec.CommandContext(ctx, "vpnkit", "--version").Output() + if err != nil { + if errdefs.IsContext(err) { + return err + } + log.G(ctx).WithError(err).Warn("Failed to retrieve vpnkit version") + return nil + } v.Components = append(v.Components, types.ComponentVersion{ Name: "vpnkit", - Version: strings.TrimSpace(string(rv)), + Version: strings.TrimSpace(strings.TrimSpace(string(out))), }) - } else { - log.G(context.TODO()).Warnf("failed to retrieve vpnkit version: %v", err) + return nil + }() + if err != nil { + return err } } + return nil } // getRootlessKitClient returns RootlessKit client @@ -384,3 +406,83 @@ func Rootless(cfg *config.Config) bool { func noNewPrivileges(cfg *config.Config) bool { return cfg.NoNewPrivileges } + +func (daemon *Daemon) populateContainerdCommit(ctx context.Context, v *system.Commit) error { + rv, err := daemon.containerd.Version(ctx) + if err != nil { + if errdefs.IsContext(err) { + return err + } + log.G(ctx).WithError(err).Warnf("Failed to retrieve containerd version") + return nil + } + v.ID = rv.Revision + v.Expected = rv.Revision + return nil +} + +func (daemon *Daemon) populateContainerdVersion(ctx context.Context, v *types.Version) error { + rv, err := daemon.containerd.Version(ctx) + if err != nil { + if errdefs.IsContext(err) { + return err + } + log.G(ctx).WithError(err).Warn("Failed to retrieve containerd version") + return nil + } + + v.Components = append(v.Components, types.ComponentVersion{ + Name: "containerd", + Version: rv.Version, + Details: map[string]string{ + "GitCommit": rv.Revision, + }, + }) + return nil +} + +func populateRuncVersion(cfg *configStore, v *types.Version) error { + _, ver, commit, err := parseDefaultRuntimeVersion(&cfg.Runtimes) + if err != nil { + return err + } + v.Components = append(v.Components, types.ComponentVersion{ + Name: cfg.Runtimes.Default, + Version: ver, + Details: map[string]string{ + "GitCommit": commit, + }, + }) + return nil +} + +func populateInitVersion(ctx context.Context, cfg *configStore, v *types.Version) error { + initBinary, err := cfg.LookupInitPath() + if err != nil { + log.G(ctx).WithError(err).Warn("Failed to find docker-init") + return nil + } + + rv, err := exec.CommandContext(ctx, initBinary, "--version").Output() + if err != nil { + if errdefs.IsContext(err) { + return err + } + log.G(ctx).WithError(err).Warnf("Failed to retrieve %s version", initBinary) + return nil + } + + ver, commit, err := parseInitVersion(string(rv)) + if err != nil { + log.G(ctx).WithError(err).Warnf("failed to parse %s version", initBinary) + return nil + } + v.Components = append(v.Components, types.ComponentVersion{ + Name: filepath.Base(initBinary), + Version: ver, + Details: map[string]string{ + "GitCommit": commit, + }, + }) + return nil +} diff --git a/daemon/info_windows.go b/daemon/info_windows.go index ff5701c118..1897aa494b 100644 --- a/daemon/info_windows.go +++ b/daemon/info_windows.go @@ -1,6 +1,8 @@ package daemon // import "github.com/docker/docker/daemon" import ( + "context" + "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/system" "github.com/docker/docker/daemon/config" @@ -8,10 +10,13 @@ import ( ) // fillPlatformInfo fills the platform related info. -func (daemon *Daemon) fillPlatformInfo(v *system.Info, sysInfo *sysinfo.SysInfo, cfg *configStore) { +func (daemon *Daemon) fillPlatformInfo(ctx context.Context, v *system.Info, sysInfo *sysinfo.SysInfo, cfg *configStore) error { + return nil } -func (daemon *Daemon) fillPlatformVersion(v *types.Version, cfg *configStore) {} +func (daemon *Daemon) fillPlatformVersion(ctx context.Context, v *types.Version, cfg *configStore) error { + return nil +} func fillDriverWarnings(v *system.Info) { } diff --git a/daemon/network.go b/daemon/network.go index 35624e5acb..b93043083b 100644 --- a/daemon/network.go +++ b/daemon/network.go @@ -139,7 +139,8 @@ func (daemon *Daemon) getAllNetworks() []*libnetwork.Network { if c == nil { return nil } - return c.Networks() + ctx := context.TODO() + return c.Networks(ctx) } type ingressJob struct { @@ -465,7 +466,7 @@ func (daemon *Daemon) DisconnectContainerFromNetwork(containerName string, netwo // GetNetworkDriverList returns the list of plugins drivers // registered for network. -func (daemon *Daemon) GetNetworkDriverList() []string { +func (daemon *Daemon) GetNetworkDriverList(ctx context.Context) []string { if !daemon.NetworkControllerEnabled() { return nil } @@ -483,7 +484,7 @@ func (daemon *Daemon) GetNetworkDriverList() []string { pluginMap[plugin] = true } - networks := daemon.netController.Networks() + networks := daemon.netController.Networks(ctx) for _, nw := range networks { if !pluginMap[nw.Type()] { diff --git a/errdefs/is.go b/errdefs/is.go index 461cba5f88..b0d745ca7a 100644 --- a/errdefs/is.go +++ b/errdefs/is.go @@ -1,5 +1,10 @@ package errdefs +import ( + "context" + "errors" +) + type causer interface { Cause() error } @@ -105,3 +110,8 @@ func IsDataLoss(err error) bool { _, ok := getImplementer(err).(ErrDataLoss) return ok } + +// IsContext returns if the passed in error is due to context cancellation or deadline exceeded. +func IsContext(err error) bool { + return errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) +} diff --git a/libnetwork/controller.go b/libnetwork/controller.go index 07898f6795..db70e8fdd4 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -820,10 +820,10 @@ func (c *Controller) addNetwork(n *Network) error { } // Networks returns the list of Network(s) managed by this controller. -func (c *Controller) Networks() []*Network { +func (c *Controller) Networks(ctx context.Context) []*Network { var list []*Network - for _, n := range c.getNetworksFromStore() { + for _, n := range c.getNetworksFromStore(ctx) { if n.inDelete { continue } @@ -835,7 +835,7 @@ func (c *Controller) Networks() []*Network { // WalkNetworks uses the provided function to walk the Network(s) managed by this controller. func (c *Controller) WalkNetworks(walker NetworkWalker) { - for _, n := range c.Networks() { + for _, n := range c.Networks(context.TODO()) { if walker(n) { return } diff --git a/libnetwork/libnetwork_linux_test.go b/libnetwork/libnetwork_linux_test.go index 0fcc41596a..a228a657c7 100644 --- a/libnetwork/libnetwork_linux_test.go +++ b/libnetwork/libnetwork_linux_test.go @@ -538,7 +538,8 @@ func TestNetworkEndpointsWalkers(t *testing.T) { t.Fatal(err) } - current := len(controller.Networks()) + ctx := context.TODO() + current := len(controller.Networks(ctx)) // Create network 2 netOption = options.Generic{ @@ -558,7 +559,7 @@ func TestNetworkEndpointsWalkers(t *testing.T) { }() // Test Networks method - if len(controller.Networks()) != current+1 { + if len(controller.Networks(ctx)) != current+1 { t.Fatalf("Did not find the expected number of networks") } diff --git a/libnetwork/store.go b/libnetwork/store.go index 4269eec71d..9162cae5d9 100644 --- a/libnetwork/store.go +++ b/libnetwork/store.go @@ -34,7 +34,7 @@ func (c *Controller) getStore() *datastore.Store { } func (c *Controller) getNetworkFromStore(nid string) (*Network, error) { - for _, n := range c.getNetworksFromStore() { + for _, n := range c.getNetworksFromStore(context.TODO()) { if n.id == nid { return n, nil } @@ -77,21 +77,21 @@ func (c *Controller) getNetworks() ([]*Network, error) { return nl, nil } -func (c *Controller) getNetworksFromStore() []*Network { // FIXME: unify with c.getNetworks() +func (c *Controller) getNetworksFromStore(ctx context.Context) []*Network { // FIXME: unify with c.getNetworks() var nl []*Network store := c.getStore() kvol, err := store.List(datastore.Key(datastore.NetworkKeyPrefix), &Network{ctrlr: c}) if err != nil { if err != datastore.ErrKeyNotFound { - log.G(context.TODO()).Debugf("failed to get networks from store: %v", err) + log.G(ctx).Debugf("failed to get networks from store: %v", err) } return nil } kvep, err := store.Map(datastore.Key(epCntKeyPrefix), &endpointCnt{}) if err != nil && err != datastore.ErrKeyNotFound { - log.G(context.TODO()).Warnf("failed to get endpoint_count map from store: %v", err) + log.G(ctx).Warnf("failed to get endpoint_count map from store: %v", err) } for _, kvo := range kvol { @@ -185,7 +185,7 @@ retry: } func (c *Controller) networkCleanup() { - for _, n := range c.getNetworksFromStore() { + for _, n := range c.getNetworksFromStore(context.TODO()) { if n.inDelete { log.G(context.TODO()).Infof("Removing stale network %s (%s)", n.Name(), n.ID()) if err := n.delete(true, true); err != nil { diff --git a/pkg/fileutils/fileutils_linux.go b/pkg/fileutils/fileutils_linux.go index e08d1bfbab..f466f705fc 100644 --- a/pkg/fileutils/fileutils_linux.go +++ b/pkg/fileutils/fileutils_linux.go @@ -6,13 +6,17 @@ import ( "io" "os" + "github.com/containerd/containerd/tracing" "github.com/containerd/log" "golang.org/x/sys/unix" ) // GetTotalUsedFds Returns the number of used File Descriptors by // reading it via /proc filesystem. -func GetTotalUsedFds() int { +func GetTotalUsedFds(ctx context.Context) int { + ctx, span := tracing.StartSpan(ctx, "GetTotalUsedFds") + defer span.End() + name := fmt.Sprintf("/proc/%d/fd", os.Getpid()) // Fast-path for Linux 6.2 (since [f1f1f2569901ec5b9d425f2e91c09a0e320768f3]). @@ -30,19 +34,26 @@ func GetTotalUsedFds() int { f, err := os.Open(name) if err != nil { - log.G(context.TODO()).WithError(err).Error("Error listing file descriptors") + log.G(ctx).WithError(err).Error("Error listing file descriptors") return -1 } defer f.Close() var fdCount int for { + select { + case <-ctx.Done(): + log.G(ctx).WithError(ctx.Err()).Error("Context cancelled while counting file descriptors") + return -1 + default: + } + names, err := f.Readdirnames(100) fdCount += len(names) if err == io.EOF { break } else if err != nil { - log.G(context.TODO()).WithError(err).Error("Error listing file descriptors") + log.G(ctx).WithError(err).Error("Error listing file descriptors") return -1 } } diff --git a/pkg/fileutils/fileutils_test.go b/pkg/fileutils/fileutils_test.go index 7876ddf4b2..6149fbfd27 100644 --- a/pkg/fileutils/fileutils_test.go +++ b/pkg/fileutils/fileutils_test.go @@ -1,6 +1,7 @@ package fileutils // import "github.com/docker/docker/pkg/fileutils" import ( + "context" "errors" "os" "path" @@ -242,8 +243,9 @@ func TestCreateIfNotExistsFile(t *testing.T) { } func BenchmarkGetTotalUsedFds(b *testing.B) { + ctx := context.Background() b.ReportAllocs() for i := 0; i < b.N; i++ { - _ = GetTotalUsedFds() + _ = GetTotalUsedFds(ctx) } } diff --git a/pkg/fileutils/fileutils_windows.go b/pkg/fileutils/fileutils_windows.go index 3f1ebb6567..67e8fc4fda 100644 --- a/pkg/fileutils/fileutils_windows.go +++ b/pkg/fileutils/fileutils_windows.go @@ -1,7 +1,9 @@ package fileutils // import "github.com/docker/docker/pkg/fileutils" +import "context" + // GetTotalUsedFds Returns the number of used File Descriptors. Not supported // on Windows. -func GetTotalUsedFds() int { +func GetTotalUsedFds(ctx context.Context) int { return -1 }