Merge pull request #46617 from xinfengliu/23.0_backport_improve_stats_collector
[23.0 backport] Make one-shot stats faster
This commit is contained in:
commit
6b3cb4aaed
6 changed files with 110 additions and 133 deletions
|
@ -7,6 +7,7 @@ import (
|
||||||
"runtime"
|
"runtime"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd/log"
|
||||||
"github.com/docker/docker/api/types"
|
"github.com/docker/docker/api/types"
|
||||||
"github.com/docker/docker/api/types/backend"
|
"github.com/docker/docker/api/types/backend"
|
||||||
"github.com/docker/docker/api/types/versions"
|
"github.com/docker/docker/api/types/versions"
|
||||||
|
@ -43,6 +44,15 @@ func (daemon *Daemon) ContainerStats(ctx context.Context, prefixOrName string, c
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get container stats directly if OneShot is set
|
||||||
|
if config.OneShot {
|
||||||
|
stats, err := daemon.GetContainerStats(ctr)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return json.NewEncoder(config.OutStream).Encode(stats)
|
||||||
|
}
|
||||||
|
|
||||||
outStream := config.OutStream
|
outStream := config.OutStream
|
||||||
if config.Stream {
|
if config.Stream {
|
||||||
wf := ioutils.NewWriteFlusher(outStream)
|
wf := ioutils.NewWriteFlusher(outStream)
|
||||||
|
@ -148,15 +158,34 @@ func (daemon *Daemon) unsubscribeToContainerStats(c *container.Container, ch cha
|
||||||
func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
|
func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
|
||||||
stats, err := daemon.stats(container)
|
stats, err := daemon.stats(container)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
goto done
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample system CPU usage close to container usage to avoid
|
||||||
|
// noise in metric calculations.
|
||||||
|
// FIXME: move to containerd on Linux (not Windows)
|
||||||
|
stats.CPUStats.SystemUsage, stats.CPUStats.OnlineCPUs, err = getSystemCPUUsage()
|
||||||
|
if err != nil {
|
||||||
|
goto done
|
||||||
}
|
}
|
||||||
|
|
||||||
// We already have the network stats on Windows directly from HCS.
|
// We already have the network stats on Windows directly from HCS.
|
||||||
if !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
|
if !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
|
||||||
if stats.Networks, err = daemon.getNetworkStats(container); err != nil {
|
stats.Networks, err = daemon.getNetworkStats(container)
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return stats, nil
|
done:
|
||||||
|
switch err.(type) {
|
||||||
|
case nil:
|
||||||
|
return stats, nil
|
||||||
|
case errdefs.ErrConflict, errdefs.ErrNotFound:
|
||||||
|
// return empty stats containing only name and ID if not running or not found
|
||||||
|
return &types.StatsJSON{
|
||||||
|
Name: container.Name,
|
||||||
|
ID: container.ID,
|
||||||
|
}, nil
|
||||||
|
default:
|
||||||
|
log.G(context.TODO()).Errorf("collecting stats for container %s: %v", container.Name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,15 +1,12 @@
|
||||||
package stats // import "github.com/docker/docker/daemon/stats"
|
package stats // import "github.com/docker/docker/daemon/stats"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/docker/docker/api/types"
|
"github.com/docker/docker/api/types"
|
||||||
"github.com/docker/docker/container"
|
"github.com/docker/docker/container"
|
||||||
"github.com/docker/docker/errdefs"
|
|
||||||
"github.com/moby/pubsub"
|
"github.com/moby/pubsub"
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Collector manages and provides container resource stats
|
// Collector manages and provides container resource stats
|
||||||
|
@ -19,7 +16,6 @@ type Collector struct {
|
||||||
supervisor supervisor
|
supervisor supervisor
|
||||||
interval time.Duration
|
interval time.Duration
|
||||||
publishers map[*container.Container]*pubsub.Publisher
|
publishers map[*container.Container]*pubsub.Publisher
|
||||||
bufReader *bufio.Reader
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCollector creates a stats collector that will poll the supervisor with the specified interval
|
// NewCollector creates a stats collector that will poll the supervisor with the specified interval
|
||||||
|
@ -28,7 +24,6 @@ func NewCollector(supervisor supervisor, interval time.Duration) *Collector {
|
||||||
interval: interval,
|
interval: interval,
|
||||||
supervisor: supervisor,
|
supervisor: supervisor,
|
||||||
publishers: make(map[*container.Container]*pubsub.Publisher),
|
publishers: make(map[*container.Container]*pubsub.Publisher),
|
||||||
bufReader: bufio.NewReaderSize(nil, 128),
|
|
||||||
}
|
}
|
||||||
s.cond = sync.NewCond(&s.m)
|
s.cond = sync.NewCond(&s.m)
|
||||||
return s
|
return s
|
||||||
|
@ -107,45 +102,15 @@ func (s *Collector) Run() {
|
||||||
|
|
||||||
s.cond.L.Unlock()
|
s.cond.L.Unlock()
|
||||||
|
|
||||||
onlineCPUs, err := s.getNumberOnlineCPUs()
|
|
||||||
if err != nil {
|
|
||||||
logrus.Errorf("collecting system online cpu count: %v", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, pair := range pairs {
|
for _, pair := range pairs {
|
||||||
stats, err := s.supervisor.GetContainerStats(pair.container)
|
stats, err := s.supervisor.GetContainerStats(pair.container)
|
||||||
|
if err != nil {
|
||||||
switch err.(type) {
|
stats = &types.StatsJSON{
|
||||||
case nil:
|
Name: pair.container.Name,
|
||||||
// Sample system CPU usage close to container usage to avoid
|
ID: pair.container.ID,
|
||||||
// noise in metric calculations.
|
|
||||||
systemUsage, err := s.getSystemCPUUsage()
|
|
||||||
if err != nil {
|
|
||||||
logrus.WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: move to containerd on Linux (not Windows)
|
|
||||||
stats.CPUStats.SystemUsage = systemUsage
|
|
||||||
stats.CPUStats.OnlineCPUs = onlineCPUs
|
|
||||||
|
|
||||||
pair.publisher.Publish(*stats)
|
|
||||||
|
|
||||||
case errdefs.ErrConflict, errdefs.ErrNotFound:
|
|
||||||
// publish empty stats containing only name and ID if not running or not found
|
|
||||||
pair.publisher.Publish(types.StatsJSON{
|
|
||||||
Name: pair.container.Name,
|
|
||||||
ID: pair.container.ID,
|
|
||||||
})
|
|
||||||
|
|
||||||
default:
|
|
||||||
logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
|
|
||||||
pair.publisher.Publish(types.StatsJSON{
|
|
||||||
Name: pair.container.Name,
|
|
||||||
ID: pair.container.ID,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
pair.publisher.Publish(*stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(s.interval)
|
time.Sleep(s.interval)
|
||||||
|
|
|
@ -1,76 +0,0 @@
|
||||||
//go:build !windows
|
|
||||||
// +build !windows
|
|
||||||
|
|
||||||
package stats // import "github.com/docker/docker/daemon/stats"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
|
||||||
// on Linux it's a constant which is safe to be hard coded,
|
|
||||||
// so we can avoid using cgo here. For details, see:
|
|
||||||
// https://github.com/containerd/cgroups/pull/12
|
|
||||||
clockTicksPerSecond = 100
|
|
||||||
nanoSecondsPerSecond = 1e9
|
|
||||||
)
|
|
||||||
|
|
||||||
// getSystemCPUUsage returns the host system's cpu usage in
|
|
||||||
// nanoseconds. An error is returned if the format of the underlying
|
|
||||||
// file does not match.
|
|
||||||
//
|
|
||||||
// Uses /proc/stat defined by POSIX. Looks for the cpu
|
|
||||||
// statistics line and then sums up the first seven fields
|
|
||||||
// provided. See `man 5 proc` for details on specific field
|
|
||||||
// information.
|
|
||||||
func (s *Collector) getSystemCPUUsage() (uint64, error) {
|
|
||||||
f, err := os.Open("/proc/stat")
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
s.bufReader.Reset(nil)
|
|
||||||
f.Close()
|
|
||||||
}()
|
|
||||||
s.bufReader.Reset(f)
|
|
||||||
|
|
||||||
for {
|
|
||||||
line, err := s.bufReader.ReadString('\n')
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
parts := strings.Fields(line)
|
|
||||||
switch parts[0] {
|
|
||||||
case "cpu":
|
|
||||||
if len(parts) < 8 {
|
|
||||||
return 0, fmt.Errorf("invalid number of cpu fields")
|
|
||||||
}
|
|
||||||
var totalClockTicks uint64
|
|
||||||
for _, i := range parts[1:8] {
|
|
||||||
v, err := strconv.ParseUint(i, 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("Unable to convert value %s to int: %s", i, err)
|
|
||||||
}
|
|
||||||
totalClockTicks += v
|
|
||||||
}
|
|
||||||
return (totalClockTicks * nanoSecondsPerSecond) /
|
|
||||||
clockTicksPerSecond, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0, fmt.Errorf("invalid stat format. Error trying to parse the '/proc/stat' file")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Collector) getNumberOnlineCPUs() (uint32, error) {
|
|
||||||
var cpuset unix.CPUSet
|
|
||||||
err := unix.SchedGetaffinity(0, &cpuset)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
return uint32(cpuset.Count()), nil
|
|
||||||
}
|
|
|
@ -1,12 +0,0 @@
|
||||||
package stats // import "github.com/docker/docker/daemon/stats"
|
|
||||||
|
|
||||||
// getSystemCPUUsage returns the host system's cpu usage in
|
|
||||||
// nanoseconds. An error is returned if the format of the underlying
|
|
||||||
// file does not match. This is a no-op on Windows.
|
|
||||||
func (s *Collector) getSystemCPUUsage() (uint64, error) {
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Collector) getNumberOnlineCPUs() (uint32, error) {
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
|
@ -4,6 +4,12 @@
|
||||||
package daemon // import "github.com/docker/docker/daemon"
|
package daemon // import "github.com/docker/docker/daemon"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/docker/docker/api/types"
|
"github.com/docker/docker/api/types"
|
||||||
"github.com/docker/docker/container"
|
"github.com/docker/docker/container"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
@ -56,3 +62,60 @@ func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.
|
||||||
|
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
||||||
|
// on Linux it's a constant which is safe to be hard coded,
|
||||||
|
// so we can avoid using cgo here. For details, see:
|
||||||
|
// https://github.com/containerd/cgroups/pull/12
|
||||||
|
clockTicksPerSecond = 100
|
||||||
|
nanoSecondsPerSecond = 1e9
|
||||||
|
)
|
||||||
|
|
||||||
|
// getSystemCPUUsage returns the host system's cpu usage in
|
||||||
|
// nanoseconds and number of online CPUs. An error is returned
|
||||||
|
// if the format of the underlying file does not match.
|
||||||
|
//
|
||||||
|
// Uses /proc/stat defined by POSIX. Looks for the cpu
|
||||||
|
// statistics line and then sums up the first seven fields
|
||||||
|
// provided. See `man 5 proc` for details on specific field
|
||||||
|
// information.
|
||||||
|
func getSystemCPUUsage() (cpuUsage uint64, cpuNum uint32, err error) {
|
||||||
|
f, err := os.Open("/proc/stat")
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if len(line) < 4 || line[:3] != "cpu" {
|
||||||
|
break // Assume all cpu* records are at the front, like glibc https://github.com/bminor/glibc/blob/5d00c201b9a2da768a79ea8d5311f257871c0b43/sysdeps/unix/sysv/linux/getsysstats.c#L108-L135
|
||||||
|
}
|
||||||
|
if line[3] == ' ' {
|
||||||
|
parts := strings.Fields(line)
|
||||||
|
if len(parts) < 8 {
|
||||||
|
return 0, 0, fmt.Errorf("invalid number of cpu fields")
|
||||||
|
}
|
||||||
|
var totalClockTicks uint64
|
||||||
|
for _, i := range parts[1:8] {
|
||||||
|
v, err := strconv.ParseUint(i, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, fmt.Errorf("Unable to convert value %s to int: %w", i, err)
|
||||||
|
}
|
||||||
|
totalClockTicks += v
|
||||||
|
}
|
||||||
|
cpuUsage = (totalClockTicks * nanoSecondsPerSecond) /
|
||||||
|
clockTicksPerSecond
|
||||||
|
}
|
||||||
|
if '0' <= line[3] && line[3] <= '9' {
|
||||||
|
cpuNum++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return 0, 0, fmt.Errorf("error scanning '/proc/stat' file: %w", err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
|
@ -9,3 +9,11 @@ import (
|
||||||
func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) {
|
func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) {
|
||||||
return make(map[string]types.NetworkStats), nil
|
return make(map[string]types.NetworkStats), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getSystemCPUUsage returns the host system's cpu usage in
|
||||||
|
// nanoseconds and number of online CPUs. An error is returned
|
||||||
|
// if the format of the underlying file does not match.
|
||||||
|
// This is a no-op on Windows.
|
||||||
|
func getSystemCPUUsage() (uint64, uint32, error) {
|
||||||
|
return 0, 0, nil
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue