diff --git a/daemon/stats.go b/daemon/stats.go index c380f4e529..94374d6059 100644 --- a/daemon/stats.go +++ b/daemon/stats.go @@ -7,6 +7,7 @@ import ( "runtime" "time" + "github.com/containerd/containerd/log" "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/backend" "github.com/docker/docker/api/types/versions" @@ -43,6 +44,15 @@ func (daemon *Daemon) ContainerStats(ctx context.Context, prefixOrName string, c }) } + // Get container stats directly if OneShot is set + if config.OneShot { + stats, err := daemon.GetContainerStats(ctr) + if err != nil { + return err + } + return json.NewEncoder(config.OutStream).Encode(stats) + } + outStream := config.OutStream if config.Stream { wf := ioutils.NewWriteFlusher(outStream) @@ -146,17 +156,37 @@ func (daemon *Daemon) unsubscribeToContainerStats(c *container.Container, ch cha // GetContainerStats collects all the stats published by a container func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) { - stats, err := daemon.stats(container) - if err != nil { - return nil, err - } + var stats *types.StatsJSON + var err error + + stats, err = daemon.stats(container) // We already have the network stats on Windows directly from HCS. - if !container.Config.NetworkDisabled && runtime.GOOS != "windows" { - if stats.Networks, err = daemon.getNetworkStats(container); err != nil { - return nil, err - } + if err == nil && !container.Config.NetworkDisabled && runtime.GOOS != "windows" { + stats.Networks, err = daemon.getNetworkStats(container) } - return stats, nil + switch err.(type) { + case nil: + // Sample system CPU usage close to container usage to avoid + // noise in metric calculations. + systemUsage, onlineCPUs, err := getSystemCPUUsage() + if err != nil { + log.G(context.TODO()).WithError(err).WithField("container_id", container.ID).Errorf("collecting system cpu usage") + return nil, err + } + // FIXME: move to containerd on Linux (not Windows) + stats.CPUStats.SystemUsage = systemUsage + stats.CPUStats.OnlineCPUs = onlineCPUs + return stats, nil + case errdefs.ErrConflict, errdefs.ErrNotFound: + // return empty stats containing only name and ID if not running or not found + return &types.StatsJSON{ + Name: container.Name, + ID: container.ID, + }, nil + default: + log.G(context.TODO()).Errorf("collecting stats for container %s: %v", container.ID, err) + return nil, err + } } diff --git a/daemon/stats/collector.go b/daemon/stats/collector.go index aa033e48ed..ce36d5c2ab 100644 --- a/daemon/stats/collector.go +++ b/daemon/stats/collector.go @@ -1,15 +1,11 @@ package stats // import "github.com/docker/docker/daemon/stats" import ( - "bufio" - "context" "sync" "time" - "github.com/containerd/containerd/log" "github.com/docker/docker/api/types" "github.com/docker/docker/container" - "github.com/docker/docker/errdefs" "github.com/moby/pubsub" ) @@ -20,7 +16,6 @@ type Collector struct { supervisor supervisor interval time.Duration publishers map[*container.Container]*pubsub.Publisher - bufReader *bufio.Reader } // NewCollector creates a stats collector that will poll the supervisor with the specified interval @@ -29,7 +24,6 @@ func NewCollector(supervisor supervisor, interval time.Duration) *Collector { interval: interval, supervisor: supervisor, publishers: make(map[*container.Container]*pubsub.Publisher), - bufReader: bufio.NewReaderSize(nil, 128), } s.cond = sync.NewCond(&s.m) return s @@ -108,45 +102,15 @@ func (s *Collector) Run() { s.cond.L.Unlock() - onlineCPUs, err := s.getNumberOnlineCPUs() - if err != nil { - log.G(context.TODO()).Errorf("collecting system online cpu count: %v", err) - continue - } - for _, pair := range pairs { stats, err := s.supervisor.GetContainerStats(pair.container) - - switch err.(type) { - case nil: - // Sample system CPU usage close to container usage to avoid - // noise in metric calculations. - systemUsage, err := s.getSystemCPUUsage() - if err != nil { - log.G(context.TODO()).WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage") - continue + if err != nil { + stats = &types.StatsJSON{ + Name: pair.container.Name, + ID: pair.container.ID, } - - // FIXME: move to containerd on Linux (not Windows) - stats.CPUStats.SystemUsage = systemUsage - stats.CPUStats.OnlineCPUs = onlineCPUs - - pair.publisher.Publish(*stats) - - case errdefs.ErrConflict, errdefs.ErrNotFound: - // publish empty stats containing only name and ID if not running or not found - pair.publisher.Publish(types.StatsJSON{ - Name: pair.container.Name, - ID: pair.container.ID, - }) - - default: - log.G(context.TODO()).Errorf("collecting stats for %s: %v", pair.container.ID, err) - pair.publisher.Publish(types.StatsJSON{ - Name: pair.container.Name, - ID: pair.container.ID, - }) } + pair.publisher.Publish(*stats) } time.Sleep(s.interval) diff --git a/daemon/stats/collector_unix.go b/daemon/stats/collector_unix.go deleted file mode 100644 index 27d33e941d..0000000000 --- a/daemon/stats/collector_unix.go +++ /dev/null @@ -1,75 +0,0 @@ -//go:build !windows - -package stats // import "github.com/docker/docker/daemon/stats" - -import ( - "fmt" - "os" - "strconv" - "strings" - - "golang.org/x/sys/unix" -) - -const ( - // The value comes from `C.sysconf(C._SC_CLK_TCK)`, and - // on Linux it's a constant which is safe to be hard coded, - // so we can avoid using cgo here. For details, see: - // https://github.com/containerd/cgroups/pull/12 - clockTicksPerSecond = 100 - nanoSecondsPerSecond = 1e9 -) - -// getSystemCPUUsage returns the host system's cpu usage in -// nanoseconds. An error is returned if the format of the underlying -// file does not match. -// -// Uses /proc/stat defined by POSIX. Looks for the cpu -// statistics line and then sums up the first seven fields -// provided. See `man 5 proc` for details on specific field -// information. -func (s *Collector) getSystemCPUUsage() (uint64, error) { - f, err := os.Open("/proc/stat") - if err != nil { - return 0, err - } - defer func() { - s.bufReader.Reset(nil) - f.Close() - }() - s.bufReader.Reset(f) - - for { - line, err := s.bufReader.ReadString('\n') - if err != nil { - break - } - parts := strings.Fields(line) - switch parts[0] { - case "cpu": - if len(parts) < 8 { - return 0, fmt.Errorf("invalid number of cpu fields") - } - var totalClockTicks uint64 - for _, i := range parts[1:8] { - v, err := strconv.ParseUint(i, 10, 64) - if err != nil { - return 0, fmt.Errorf("Unable to convert value %s to int: %s", i, err) - } - totalClockTicks += v - } - return (totalClockTicks * nanoSecondsPerSecond) / - clockTicksPerSecond, nil - } - } - return 0, fmt.Errorf("invalid stat format. Error trying to parse the '/proc/stat' file") -} - -func (s *Collector) getNumberOnlineCPUs() (uint32, error) { - var cpuset unix.CPUSet - err := unix.SchedGetaffinity(0, &cpuset) - if err != nil { - return 0, err - } - return uint32(cpuset.Count()), nil -} diff --git a/daemon/stats/collector_windows.go b/daemon/stats/collector_windows.go deleted file mode 100644 index d8e4b37507..0000000000 --- a/daemon/stats/collector_windows.go +++ /dev/null @@ -1,12 +0,0 @@ -package stats // import "github.com/docker/docker/daemon/stats" - -// getSystemCPUUsage returns the host system's cpu usage in -// nanoseconds. An error is returned if the format of the underlying -// file does not match. This is a no-op on Windows. -func (s *Collector) getSystemCPUUsage() (uint64, error) { - return 0, nil -} - -func (s *Collector) getNumberOnlineCPUs() (uint32, error) { - return 0, nil -} diff --git a/daemon/stats_unix.go b/daemon/stats_unix.go index 3a64785254..7c784b716c 100644 --- a/daemon/stats_unix.go +++ b/daemon/stats_unix.go @@ -3,7 +3,11 @@ package daemon // import "github.com/docker/docker/daemon" import ( + "bufio" "context" + "fmt" + "os" + "strconv" "strings" statsV1 "github.com/containerd/cgroups/v3/cgroup1/stats" @@ -296,3 +300,60 @@ func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types. return stats, nil } + +const ( + // The value comes from `C.sysconf(C._SC_CLK_TCK)`, and + // on Linux it's a constant which is safe to be hard coded, + // so we can avoid using cgo here. For details, see: + // https://github.com/containerd/cgroups/pull/12 + clockTicksPerSecond = 100 + nanoSecondsPerSecond = 1e9 +) + +// getSystemCPUUsage returns the host system's cpu usage in +// nanoseconds and number of online CPUs. An error is returned +// if the format of the underlying file does not match. +// +// Uses /proc/stat defined by POSIX. Looks for the cpu +// statistics line and then sums up the first seven fields +// provided. See `man 5 proc` for details on specific field +// information. +func getSystemCPUUsage() (cpuUsage uint64, cpuNum uint32, err error) { + f, err := os.Open("/proc/stat") + if err != nil { + return 0, 0, err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + if len(line) < 4 || line[:3] != "cpu" { + break // Assume all cpu* records are at the front, like glibc https://github.com/bminor/glibc/blob/5d00c201b9a2da768a79ea8d5311f257871c0b43/sysdeps/unix/sysv/linux/getsysstats.c#L108-L135 + } + if line[3] == ' ' { + parts := strings.Fields(line) + if len(parts) < 8 { + return 0, 0, fmt.Errorf("invalid number of cpu fields") + } + var totalClockTicks uint64 + for _, i := range parts[1:8] { + v, err := strconv.ParseUint(i, 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("Unable to convert value %s to int: %w", i, err) + } + totalClockTicks += v + } + cpuUsage = (totalClockTicks * nanoSecondsPerSecond) / + clockTicksPerSecond + } + if '0' <= line[3] && line[3] <= '9' { + cpuNum++ + } + } + + if err := scanner.Err(); err != nil { + return 0, 0, fmt.Errorf("error scanning '/proc/stat' file: %w", err) + } + return +} diff --git a/daemon/stats_windows.go b/daemon/stats_windows.go index e8547482a1..21724e2e0a 100644 --- a/daemon/stats_windows.go +++ b/daemon/stats_windows.go @@ -77,3 +77,11 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) { return make(map[string]types.NetworkStats), nil } + +// getSystemCPUUsage returns the host system's cpu usage in +// nanoseconds and number of online CPUs. An error is returned +// if the format of the underlying file does not match. +// This is a no-op on Windows. +func getSystemCPUUsage() (uint64, uint32, error) { + return 0, 0, nil +}