Ver Fonte

Merge pull request #36519 from stevvooe/resilient-cpu-sampling

daemon/stats: more resilient cpu sampling
Yong Tang há 7 anos atrás
pai
commit
623b1a5c3c
1 ficheiros alterados com 11 adições e 7 exclusões
  1. 11 7
      daemon/stats/collector.go

+ 11 - 7
daemon/stats/collector.go

@@ -90,7 +90,7 @@ func (s *Collector) Run() {
 	// it will grow enough in first iteration
 	var pairs []publishersPair
 
-	for range time.Tick(s.interval) {
+	for {
 		// it does not make sense in the first iteration,
 		// but saves allocations in further iterations
 		pairs = pairs[:0]
@@ -105,12 +105,6 @@ func (s *Collector) Run() {
 			continue
 		}
 
-		systemUsage, err := s.getSystemCPUUsage()
-		if err != nil {
-			logrus.Errorf("collecting system cpu usage: %v", err)
-			continue
-		}
-
 		onlineCPUs, err := s.getNumberOnlineCPUs()
 		if err != nil {
 			logrus.Errorf("collecting system online cpu count: %v", err)
@@ -122,6 +116,14 @@ func (s *Collector) Run() {
 
 			switch err.(type) {
 			case nil:
+				// Sample system CPU usage close to container usage to avoid
+				// noise in metric calculations.
+				systemUsage, err := s.getSystemCPUUsage()
+				if err != nil {
+					logrus.WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
+					continue
+				}
+
 				// FIXME: move to containerd on Linux (not Windows)
 				stats.CPUStats.SystemUsage = systemUsage
 				stats.CPUStats.OnlineCPUs = onlineCPUs
@@ -139,6 +141,8 @@ func (s *Collector) Run() {
 				logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
 			}
 		}
+
+		time.Sleep(s.interval)
 	}
 }