소스 검색

Merge pull request #46448 from xinfengliu/improve-stats-collector

Make one-shot stats faster
Brian Goff 1 년 전
부모
커밋
f6fa56194f
6개의 변경된 파일107개의 추가작업 그리고 132개의 파일을 삭제
  1. 34 5
      daemon/stats.go
  2. 4 40
      daemon/stats/collector.go
  3. 0 75
      daemon/stats/collector_unix.go
  4. 0 12
      daemon/stats/collector_windows.go
  5. 61 0
      daemon/stats_unix.go
  6. 8 0
      daemon/stats_windows.go

+ 34 - 5
daemon/stats.go

@@ -7,6 +7,7 @@ import (
 	"runtime"
 	"runtime"
 	"time"
 	"time"
 
 
+	"github.com/containerd/containerd/log"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/backend"
 	"github.com/docker/docker/api/types/backend"
 	"github.com/docker/docker/api/types/versions"
 	"github.com/docker/docker/api/types/versions"
@@ -43,6 +44,15 @@ func (daemon *Daemon) ContainerStats(ctx context.Context, prefixOrName string, c
 		})
 		})
 	}
 	}
 
 
+	// Get container stats directly if OneShot is set
+	if config.OneShot {
+		stats, err := daemon.GetContainerStats(ctr)
+		if err != nil {
+			return err
+		}
+		return json.NewEncoder(config.OutStream).Encode(stats)
+	}
+
 	outStream := config.OutStream
 	outStream := config.OutStream
 	if config.Stream {
 	if config.Stream {
 		wf := ioutils.NewWriteFlusher(outStream)
 		wf := ioutils.NewWriteFlusher(outStream)
@@ -148,15 +158,34 @@ func (daemon *Daemon) unsubscribeToContainerStats(c *container.Container, ch cha
 func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
 func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
 	stats, err := daemon.stats(container)
 	stats, err := daemon.stats(container)
 	if err != nil {
 	if err != nil {
-		return nil, err
+		goto done
+	}
+
+	// Sample system CPU usage close to container usage to avoid
+	// noise in metric calculations.
+	// FIXME: move to containerd on Linux (not Windows)
+	stats.CPUStats.SystemUsage, stats.CPUStats.OnlineCPUs, err = getSystemCPUUsage()
+	if err != nil {
+		goto done
 	}
 	}
 
 
 	// We already have the network stats on Windows directly from HCS.
 	// We already have the network stats on Windows directly from HCS.
 	if !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
 	if !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
-		if stats.Networks, err = daemon.getNetworkStats(container); err != nil {
-			return nil, err
-		}
+		stats.Networks, err = daemon.getNetworkStats(container)
 	}
 	}
 
 
-	return stats, nil
+done:
+	switch err.(type) {
+	case nil:
+		return stats, nil
+	case errdefs.ErrConflict, errdefs.ErrNotFound:
+		// return empty stats containing only name and ID if not running or not found
+		return &types.StatsJSON{
+			Name: container.Name,
+			ID:   container.ID,
+		}, nil
+	default:
+		log.G(context.TODO()).Errorf("collecting stats for container %s: %v", container.Name, err)
+		return nil, err
+	}
 }
 }

+ 4 - 40
daemon/stats/collector.go

@@ -1,15 +1,11 @@
 package stats // import "github.com/docker/docker/daemon/stats"
 package stats // import "github.com/docker/docker/daemon/stats"
 
 
 import (
 import (
-	"bufio"
-	"context"
 	"sync"
 	"sync"
 	"time"
 	"time"
 
 
-	"github.com/containerd/containerd/log"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/errdefs"
 	"github.com/moby/pubsub"
 	"github.com/moby/pubsub"
 )
 )
 
 
@@ -20,7 +16,6 @@ type Collector struct {
 	supervisor supervisor
 	supervisor supervisor
 	interval   time.Duration
 	interval   time.Duration
 	publishers map[*container.Container]*pubsub.Publisher
 	publishers map[*container.Container]*pubsub.Publisher
-	bufReader  *bufio.Reader
 }
 }
 
 
 // NewCollector creates a stats collector that will poll the supervisor with the specified interval
 // NewCollector creates a stats collector that will poll the supervisor with the specified interval
@@ -29,7 +24,6 @@ func NewCollector(supervisor supervisor, interval time.Duration) *Collector {
 		interval:   interval,
 		interval:   interval,
 		supervisor: supervisor,
 		supervisor: supervisor,
 		publishers: make(map[*container.Container]*pubsub.Publisher),
 		publishers: make(map[*container.Container]*pubsub.Publisher),
-		bufReader:  bufio.NewReaderSize(nil, 128),
 	}
 	}
 	s.cond = sync.NewCond(&s.m)
 	s.cond = sync.NewCond(&s.m)
 	return s
 	return s
@@ -108,45 +102,15 @@ func (s *Collector) Run() {
 
 
 		s.cond.L.Unlock()
 		s.cond.L.Unlock()
 
 
-		onlineCPUs, err := s.getNumberOnlineCPUs()
-		if err != nil {
-			log.G(context.TODO()).Errorf("collecting system online cpu count: %v", err)
-			continue
-		}
-
 		for _, pair := range pairs {
 		for _, pair := range pairs {
 			stats, err := s.supervisor.GetContainerStats(pair.container)
 			stats, err := s.supervisor.GetContainerStats(pair.container)
-
-			switch err.(type) {
-			case nil:
-				// Sample system CPU usage close to container usage to avoid
-				// noise in metric calculations.
-				systemUsage, err := s.getSystemCPUUsage()
-				if err != nil {
-					log.G(context.TODO()).WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
-					continue
-				}
-
-				// FIXME: move to containerd on Linux (not Windows)
-				stats.CPUStats.SystemUsage = systemUsage
-				stats.CPUStats.OnlineCPUs = onlineCPUs
-
-				pair.publisher.Publish(*stats)
-
-			case errdefs.ErrConflict, errdefs.ErrNotFound:
-				// publish empty stats containing only name and ID if not running or not found
-				pair.publisher.Publish(types.StatsJSON{
+			if err != nil {
+				stats = &types.StatsJSON{
 					Name: pair.container.Name,
 					Name: pair.container.Name,
 					ID:   pair.container.ID,
 					ID:   pair.container.ID,
-				})
-
-			default:
-				log.G(context.TODO()).Errorf("collecting stats for %s: %v", pair.container.ID, err)
-				pair.publisher.Publish(types.StatsJSON{
-					Name: pair.container.Name,
-					ID:   pair.container.ID,
-				})
+				}
 			}
 			}
+			pair.publisher.Publish(*stats)
 		}
 		}
 
 
 		time.Sleep(s.interval)
 		time.Sleep(s.interval)

+ 0 - 75
daemon/stats/collector_unix.go

@@ -1,75 +0,0 @@
-//go:build !windows
-
-package stats // import "github.com/docker/docker/daemon/stats"
-
-import (
-	"fmt"
-	"os"
-	"strconv"
-	"strings"
-
-	"golang.org/x/sys/unix"
-)
-
-const (
-	// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
-	// on Linux it's a constant which is safe to be hard coded,
-	// so we can avoid using cgo here. For details, see:
-	// https://github.com/containerd/cgroups/pull/12
-	clockTicksPerSecond  = 100
-	nanoSecondsPerSecond = 1e9
-)
-
-// getSystemCPUUsage returns the host system's cpu usage in
-// nanoseconds. An error is returned if the format of the underlying
-// file does not match.
-//
-// Uses /proc/stat defined by POSIX. Looks for the cpu
-// statistics line and then sums up the first seven fields
-// provided. See `man 5 proc` for details on specific field
-// information.
-func (s *Collector) getSystemCPUUsage() (uint64, error) {
-	f, err := os.Open("/proc/stat")
-	if err != nil {
-		return 0, err
-	}
-	defer func() {
-		s.bufReader.Reset(nil)
-		f.Close()
-	}()
-	s.bufReader.Reset(f)
-
-	for {
-		line, err := s.bufReader.ReadString('\n')
-		if err != nil {
-			break
-		}
-		parts := strings.Fields(line)
-		switch parts[0] {
-		case "cpu":
-			if len(parts) < 8 {
-				return 0, fmt.Errorf("invalid number of cpu fields")
-			}
-			var totalClockTicks uint64
-			for _, i := range parts[1:8] {
-				v, err := strconv.ParseUint(i, 10, 64)
-				if err != nil {
-					return 0, fmt.Errorf("Unable to convert value %s to int: %s", i, err)
-				}
-				totalClockTicks += v
-			}
-			return (totalClockTicks * nanoSecondsPerSecond) /
-				clockTicksPerSecond, nil
-		}
-	}
-	return 0, fmt.Errorf("invalid stat format. Error trying to parse the '/proc/stat' file")
-}
-
-func (s *Collector) getNumberOnlineCPUs() (uint32, error) {
-	var cpuset unix.CPUSet
-	err := unix.SchedGetaffinity(0, &cpuset)
-	if err != nil {
-		return 0, err
-	}
-	return uint32(cpuset.Count()), nil
-}

+ 0 - 12
daemon/stats/collector_windows.go

@@ -1,12 +0,0 @@
-package stats // import "github.com/docker/docker/daemon/stats"
-
-// getSystemCPUUsage returns the host system's cpu usage in
-// nanoseconds. An error is returned if the format of the underlying
-// file does not match. This is a no-op on Windows.
-func (s *Collector) getSystemCPUUsage() (uint64, error) {
-	return 0, nil
-}
-
-func (s *Collector) getNumberOnlineCPUs() (uint32, error) {
-	return 0, nil
-}

+ 61 - 0
daemon/stats_unix.go

@@ -3,7 +3,11 @@
 package daemon // import "github.com/docker/docker/daemon"
 package daemon // import "github.com/docker/docker/daemon"
 
 
 import (
 import (
+	"bufio"
 	"context"
 	"context"
+	"fmt"
+	"os"
+	"strconv"
 	"strings"
 	"strings"
 
 
 	statsV1 "github.com/containerd/cgroups/v3/cgroup1/stats"
 	statsV1 "github.com/containerd/cgroups/v3/cgroup1/stats"
@@ -296,3 +300,60 @@ func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.
 
 
 	return stats, nil
 	return stats, nil
 }
 }
+
+const (
+	// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
+	// on Linux it's a constant which is safe to be hard coded,
+	// so we can avoid using cgo here. For details, see:
+	// https://github.com/containerd/cgroups/pull/12
+	clockTicksPerSecond  = 100
+	nanoSecondsPerSecond = 1e9
+)
+
+// getSystemCPUUsage returns the host system's cpu usage in
+// nanoseconds and number of online CPUs. An error is returned
+// if the format of the underlying file does not match.
+//
+// Uses /proc/stat defined by POSIX. Looks for the cpu
+// statistics line and then sums up the first seven fields
+// provided. See `man 5 proc` for details on specific field
+// information.
+func getSystemCPUUsage() (cpuUsage uint64, cpuNum uint32, err error) {
+	f, err := os.Open("/proc/stat")
+	if err != nil {
+		return 0, 0, err
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if len(line) < 4 || line[:3] != "cpu" {
+			break // Assume all cpu* records are at the front, like glibc https://github.com/bminor/glibc/blob/5d00c201b9a2da768a79ea8d5311f257871c0b43/sysdeps/unix/sysv/linux/getsysstats.c#L108-L135
+		}
+		if line[3] == ' ' {
+			parts := strings.Fields(line)
+			if len(parts) < 8 {
+				return 0, 0, fmt.Errorf("invalid number of cpu fields")
+			}
+			var totalClockTicks uint64
+			for _, i := range parts[1:8] {
+				v, err := strconv.ParseUint(i, 10, 64)
+				if err != nil {
+					return 0, 0, fmt.Errorf("Unable to convert value %s to int: %w", i, err)
+				}
+				totalClockTicks += v
+			}
+			cpuUsage = (totalClockTicks * nanoSecondsPerSecond) /
+				clockTicksPerSecond
+		}
+		if '0' <= line[3] && line[3] <= '9' {
+			cpuNum++
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return 0, 0, fmt.Errorf("error scanning '/proc/stat' file: %w", err)
+	}
+	return
+}

+ 8 - 0
daemon/stats_windows.go

@@ -77,3 +77,11 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
 func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) {
 func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) {
 	return make(map[string]types.NetworkStats), nil
 	return make(map[string]types.NetworkStats), nil
 }
 }
+
+// getSystemCPUUsage returns the host system's cpu usage in
+// nanoseconds and number of online CPUs. An error is returned
+// if the format of the underlying file does not match.
+// This is a no-op on Windows.
+func getSystemCPUUsage() (uint64, uint32, error) {
+	return 0, 0, nil
+}