Prechádzať zdrojové kódy

Merge pull request #29554 from cpuguy83/keep_running_count_of_states

Use counter for tracking container states
Brian Goff 8 rokov pred
rodič
commit
669f4ba37e

+ 1 - 0
daemon/create.go

@@ -151,6 +151,7 @@ func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) (
 		return nil, err
 		return nil, err
 	}
 	}
 	daemon.Register(container)
 	daemon.Register(container)
+	stateCtr.set(container.ID, "stopped")
 	daemon.LogContainerEvent(container, "create")
 	daemon.LogContainerEvent(container, "create")
 	return container, nil
 	return container, nil
 }
 }

+ 1 - 0
daemon/daemon.go

@@ -198,6 +198,7 @@ func (daemon *Daemon) restore() error {
 			if err := backportMountSpec(c); err != nil {
 			if err := backportMountSpec(c); err != nil {
 				logrus.Error("Failed to migrate old mounts to use new spec format")
 				logrus.Error("Failed to migrate old mounts to use new spec format")
 			}
 			}
+			daemon.setStateCounter(c)
 
 
 			if c.IsRunning() || c.IsPaused() {
 			if c.IsRunning() || c.IsPaused() {
 				c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
 				c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking

+ 1 - 0
daemon/delete.go

@@ -124,6 +124,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
 				logrus.Error(e)
 				logrus.Error(e)
 			}
 			}
 			daemon.LogContainerEvent(container, "destroy")
 			daemon.LogContainerEvent(container, "destroy")
+			stateCtr.del(container.ID)
 		}
 		}
 	}()
 	}()
 
 

+ 1 - 14
daemon/info.go

@@ -4,14 +4,12 @@ import (
 	"fmt"
 	"fmt"
 	"os"
 	"os"
 	"runtime"
 	"runtime"
-	"sync/atomic"
 	"time"
 	"time"
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/api"
 	"github.com/docker/docker/api"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/cli/debug"
 	"github.com/docker/docker/cli/debug"
-	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/dockerversion"
 	"github.com/docker/docker/dockerversion"
 	"github.com/docker/docker/pkg/fileutils"
 	"github.com/docker/docker/pkg/fileutils"
@@ -58,18 +56,7 @@ func (daemon *Daemon) SystemInfo() (*types.Info, error) {
 	}
 	}
 
 
 	sysInfo := sysinfo.New(true)
 	sysInfo := sysinfo.New(true)
-
-	var cRunning, cPaused, cStopped int32
-	daemon.containers.ApplyAll(func(c *container.Container) {
-		switch c.StateString() {
-		case "paused":
-			atomic.AddInt32(&cPaused, 1)
-		case "running":
-			atomic.AddInt32(&cRunning, 1)
-		default:
-			atomic.AddInt32(&cStopped, 1)
-		}
-	})
+	cRunning, cPaused, cStopped := stateCtr.get()
 
 
 	securityOptions := []string{}
 	securityOptions := []string{}
 	if sysInfo.AppArmor {
 	if sysInfo.AppArmor {

+ 65 - 1
daemon/metrics.go

@@ -1,9 +1,15 @@
 package daemon
 package daemon
 
 
-import "github.com/docker/go-metrics"
+import (
+	"sync"
+
+	"github.com/docker/go-metrics"
+	"github.com/prometheus/client_golang/prometheus"
+)
 
 
 var (
 var (
 	containerActions          metrics.LabeledTimer
 	containerActions          metrics.LabeledTimer
+	containerStates           metrics.LabeledGauge
 	imageActions              metrics.LabeledTimer
 	imageActions              metrics.LabeledTimer
 	networkActions            metrics.LabeledTimer
 	networkActions            metrics.LabeledTimer
 	engineVersion             metrics.LabeledGauge
 	engineVersion             metrics.LabeledGauge
@@ -11,6 +17,8 @@ var (
 	engineMemory              metrics.Gauge
 	engineMemory              metrics.Gauge
 	healthChecksCounter       metrics.Counter
 	healthChecksCounter       metrics.Counter
 	healthChecksFailedCounter metrics.Counter
 	healthChecksFailedCounter metrics.Counter
+
+	stateCtr *stateCounter
 )
 )
 
 
 func init() {
 func init() {
@@ -25,6 +33,7 @@ func init() {
 	} {
 	} {
 		containerActions.WithValues(a).Update(0)
 		containerActions.WithValues(a).Update(0)
 	}
 	}
+
 	networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
 	networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
 	engineVersion = ns.NewLabeledGauge("engine", "The version and commit information for the engine process", metrics.Unit("info"),
 	engineVersion = ns.NewLabeledGauge("engine", "The version and commit information for the engine process", metrics.Unit("info"),
 		"version",
 		"version",
@@ -38,5 +47,60 @@ func init() {
 	healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
 	healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
 	healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
 	healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
 	imageActions = ns.NewLabeledTimer("image_actions", "The number of seconds it takes to process each image action", "action")
 	imageActions = ns.NewLabeledTimer("image_actions", "The number of seconds it takes to process each image action", "action")
+
+	stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
+	ns.Add(stateCtr)
+
 	metrics.Register(ns)
 	metrics.Register(ns)
 }
 }
+
+type stateCounter struct {
+	mu     sync.Mutex
+	states map[string]string
+	desc   *prometheus.Desc
+}
+
+func newStateCounter(desc *prometheus.Desc) *stateCounter {
+	return &stateCounter{
+		states: make(map[string]string),
+		desc:   desc,
+	}
+}
+
+func (ctr *stateCounter) get() (running int, paused int, stopped int) {
+	ctr.mu.Lock()
+	defer ctr.mu.Unlock()
+
+	states := map[string]int{
+		"running": 0,
+		"paused":  0,
+		"stopped": 0,
+	}
+	for _, state := range ctr.states {
+		states[state]++
+	}
+	return states["running"], states["paused"], states["stopped"]
+}
+
+func (ctr *stateCounter) set(id, label string) {
+	ctr.mu.Lock()
+	ctr.states[id] = label
+	ctr.mu.Unlock()
+}
+
+func (ctr *stateCounter) del(id string) {
+	ctr.mu.Lock()
+	delete(ctr.states, id)
+	ctr.mu.Unlock()
+}
+
+func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
+	ch <- ctr.desc
+}
+
+func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
+	running, paused, stopped := ctr.get()
+	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
+	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
+	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
+}

+ 19 - 1
daemon/monitor.go

@@ -9,10 +9,22 @@ import (
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types"
+	"github.com/docker/docker/container"
 	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/restartmanager"
 	"github.com/docker/docker/restartmanager"
 )
 )
 
 
+func (daemon *Daemon) setStateCounter(c *container.Container) {
+	switch c.StateString() {
+	case "paused":
+		stateCtr.set(c.ID, "paused")
+	case "running":
+		stateCtr.set(c.ID, "running")
+	default:
+		stateCtr.set(c.ID, "stopped")
+	}
+}
+
 // StateChanged updates daemon state changes from containerd
 // StateChanged updates daemon state changes from containerd
 func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 	c := daemon.containers.Get(id)
 	c := daemon.containers.Get(id)
@@ -81,6 +93,8 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 			}()
 			}()
 		}
 		}
 
 
+		daemon.setStateCounter(c)
+
 		defer c.Unlock()
 		defer c.Unlock()
 		if err := c.ToDisk(); err != nil {
 		if err := c.ToDisk(); err != nil {
 			return err
 			return err
@@ -109,15 +123,19 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 		c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
 		c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
 		c.HasBeenManuallyStopped = false
 		c.HasBeenManuallyStopped = false
 		c.HasBeenStartedBefore = true
 		c.HasBeenStartedBefore = true
+		daemon.setStateCounter(c)
+
 		if err := c.ToDisk(); err != nil {
 		if err := c.ToDisk(); err != nil {
 			c.Reset(false)
 			c.Reset(false)
 			return err
 			return err
 		}
 		}
 		daemon.initHealthMonitor(c)
 		daemon.initHealthMonitor(c)
+
 		daemon.LogContainerEvent(c, "start")
 		daemon.LogContainerEvent(c, "start")
 	case libcontainerd.StatePause:
 	case libcontainerd.StatePause:
 		// Container is already locked in this case
 		// Container is already locked in this case
 		c.Paused = true
 		c.Paused = true
+		daemon.setStateCounter(c)
 		if err := c.ToDisk(); err != nil {
 		if err := c.ToDisk(); err != nil {
 			return err
 			return err
 		}
 		}
@@ -126,12 +144,12 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 	case libcontainerd.StateResume:
 	case libcontainerd.StateResume:
 		// Container is already locked in this case
 		// Container is already locked in this case
 		c.Paused = false
 		c.Paused = false
+		daemon.setStateCounter(c)
 		if err := c.ToDisk(); err != nil {
 		if err := c.ToDisk(); err != nil {
 			return err
 			return err
 		}
 		}
 		daemon.updateHealthMonitor(c)
 		daemon.updateHealthMonitor(c)
 		daemon.LogContainerEvent(c, "unpause")
 		daemon.LogContainerEvent(c, "unpause")
 	}
 	}
-
 	return nil
 	return nil
 }
 }

+ 1 - 1
vendor.conf

@@ -136,7 +136,7 @@ github.com/flynn-archive/go-shlex 3f9db97f856818214da2e1057f8ad84803971cff
 github.com/Nvveen/Gotty a8b993ba6abdb0e0c12b0125c603323a71c7790c https://github.com/ijc25/Gotty
 github.com/Nvveen/Gotty a8b993ba6abdb0e0c12b0125c603323a71c7790c https://github.com/ijc25/Gotty
 
 
 # metrics
 # metrics
-github.com/docker/go-metrics 86138d05f285fd9737a99bee2d9be30866b59d72
+github.com/docker/go-metrics 8fd5772bf1584597834c6f7961a530f06cbfbb87
 
 
 # composefile
 # composefile
 github.com/mitchellh/mapstructure f3009df150dadf309fdee4a54ed65c124afad715
 github.com/mitchellh/mapstructure f3009df150dadf309fdee4a54ed65c124afad715

+ 60 - 3
vendor/github.com/docker/go-metrics/README.md

@@ -2,10 +2,67 @@
 
 
 This package is small wrapper around the prometheus go client to help enforce convention and best practices for metrics collection in Docker projects.
 This package is small wrapper around the prometheus go client to help enforce convention and best practices for metrics collection in Docker projects.
 
 
-## Status
+## Best Practices
 
 
-This project is a work in progress.
-It is under heavy development and not intended to be used.
+This packages is meant to be used for collecting metrics in Docker projects.
+It is not meant to be used as a replacement for the prometheus client but to help enforce consistent naming across metrics collected.
+If you have not already read the prometheus best practices around naming and labels you can read the page [here](https://prometheus.io/docs/practices/naming/).
+
+The following are a few Docker specific rules that will help you name and work with metrics in your project.
+
+1. Namespace and Subsystem
+
+This package provides you with a namespace type that allows you to specify the same namespace and subsystem for your metrics.
+
+```go
+ns := metrics.NewNamespace("engine", "daemon", metrics.Labels{
+        "version": dockerversion.Version,
+        "commit":  dockerversion.GitCommit,
+})
+```
+
+In the example above we are creating metrics for the Docker engine's daemon package.
+`engine` would be the namespace in this example where `daemon` is the subsystem or package where we are collecting the metrics.
+
+A namespace also allows you to attach constant labels to the metrics such as the git commit and version that it is collecting.
+
+2. Declaring your Metrics
+
+Try to keep all your metric declarations in one file.
+This makes it easy for others to see what constant labels are defined on the namespace and what labels are defined on the metrics when they are created.
+
+3. Use labels instead of multiple metrics
+
+Labels allow you to define one metric such as the time it takes to perform a certain action on an object.
+If we wanted to collect timings on various container actions such as create, start, and delete then we can define one metric called `container_actions` and use labels to specify the type of action.
+
+
+```go
+containerActions = ns.NewLabeledTimer("container_actions", "The number of milliseconds it takes to process each container action", "action")
+```
+
+The last parameter is the label name or key.
+When adding a data point to the metric you will use the `WithValues` function to specify the `action` that you are collecting for.
+
+```go
+containerActions.WithValues("create").UpdateSince(start)
+```
+
+4. Always use a unit
+
+The metric name should describe what you are measuring but you also need to provide the unit that it is being measured with.
+For a timer, the standard unit is seconds and a counter's standard unit is a total.
+For gauges you must provide the unit.
+This package provides a standard set of units for use within the Docker projects.
+
+```go
+Nanoseconds Unit = "nanoseconds"
+Seconds     Unit = "seconds"
+Bytes       Unit = "bytes"
+Total       Unit = "total"
+```
+
+If you need to use a unit but it is not defined in the package please open a PR to add it but first try to see if one of the already created units will work for your metric, i.e. seconds or nanoseconds vs adding milliseconds.
 
 
 ## Docs
 ## Docs
 
 

+ 27 - 11
vendor/github.com/docker/go-metrics/namespace.go

@@ -40,21 +40,25 @@ type Namespace struct {
 //  Only metrics created with the returned namespace will get the new constant
 //  Only metrics created with the returned namespace will get the new constant
 //  labels.  The returned namespace must be registered separately.
 //  labels.  The returned namespace must be registered separately.
 func (n *Namespace) WithConstLabels(labels Labels) *Namespace {
 func (n *Namespace) WithConstLabels(labels Labels) *Namespace {
-	ns := *n
-	ns.metrics = nil // blank this out
-	ns.labels = mergeLabels(ns.labels, labels)
-	return &ns
+	n.mu.Lock()
+	ns := &Namespace{
+		name:      n.name,
+		subsystem: n.subsystem,
+		labels:    mergeLabels(n.labels, labels),
+	}
+	n.mu.Unlock()
+	return ns
 }
 }
 
 
 func (n *Namespace) NewCounter(name, help string) Counter {
 func (n *Namespace) NewCounter(name, help string) Counter {
 	c := &counter{pc: prometheus.NewCounter(n.newCounterOpts(name, help))}
 	c := &counter{pc: prometheus.NewCounter(n.newCounterOpts(name, help))}
-	n.addMetric(c)
+	n.Add(c)
 	return c
 	return c
 }
 }
 
 
 func (n *Namespace) NewLabeledCounter(name, help string, labels ...string) LabeledCounter {
 func (n *Namespace) NewLabeledCounter(name, help string, labels ...string) LabeledCounter {
 	c := &labeledCounter{pc: prometheus.NewCounterVec(n.newCounterOpts(name, help), labels)}
 	c := &labeledCounter{pc: prometheus.NewCounterVec(n.newCounterOpts(name, help), labels)}
-	n.addMetric(c)
+	n.Add(c)
 	return c
 	return c
 }
 }
 
 
@@ -72,7 +76,7 @@ func (n *Namespace) NewTimer(name, help string) Timer {
 	t := &timer{
 	t := &timer{
 		m: prometheus.NewHistogram(n.newTimerOpts(name, help)),
 		m: prometheus.NewHistogram(n.newTimerOpts(name, help)),
 	}
 	}
-	n.addMetric(t)
+	n.Add(t)
 	return t
 	return t
 }
 }
 
 
@@ -80,7 +84,7 @@ func (n *Namespace) NewLabeledTimer(name, help string, labels ...string) Labeled
 	t := &labeledTimer{
 	t := &labeledTimer{
 		m: prometheus.NewHistogramVec(n.newTimerOpts(name, help), labels),
 		m: prometheus.NewHistogramVec(n.newTimerOpts(name, help), labels),
 	}
 	}
-	n.addMetric(t)
+	n.Add(t)
 	return t
 	return t
 }
 }
 
 
@@ -98,7 +102,7 @@ func (n *Namespace) NewGauge(name, help string, unit Unit) Gauge {
 	g := &gauge{
 	g := &gauge{
 		pg: prometheus.NewGauge(n.newGaugeOpts(name, help, unit)),
 		pg: prometheus.NewGauge(n.newGaugeOpts(name, help, unit)),
 	}
 	}
-	n.addMetric(g)
+	n.Add(g)
 	return g
 	return g
 }
 }
 
 
@@ -106,7 +110,7 @@ func (n *Namespace) NewLabeledGauge(name, help string, unit Unit, labels ...stri
 	g := &labeledGauge{
 	g := &labeledGauge{
 		pg: prometheus.NewGaugeVec(n.newGaugeOpts(name, help, unit), labels),
 		pg: prometheus.NewGaugeVec(n.newGaugeOpts(name, help, unit), labels),
 	}
 	}
-	n.addMetric(g)
+	n.Add(g)
 	return g
 	return g
 }
 }
 
 
@@ -138,12 +142,24 @@ func (n *Namespace) Collect(ch chan<- prometheus.Metric) {
 	}
 	}
 }
 }
 
 
-func (n *Namespace) addMetric(collector prometheus.Collector) {
+func (n *Namespace) Add(collector prometheus.Collector) {
 	n.mu.Lock()
 	n.mu.Lock()
 	n.metrics = append(n.metrics, collector)
 	n.metrics = append(n.metrics, collector)
 	n.mu.Unlock()
 	n.mu.Unlock()
 }
 }
 
 
+func (n *Namespace) NewDesc(name, help string, unit Unit, labels ...string) *prometheus.Desc {
+	if string(unit) != "" {
+		name = fmt.Sprintf("%s_%s", name, unit)
+	}
+	namespace := n.name
+	if n.subsystem != "" {
+		namespace = fmt.Sprintf("%s_%s", namespace, n.subsystem)
+	}
+	name = fmt.Sprintf("%s_%s", namespace, name)
+	return prometheus.NewDesc(name, help, labels, prometheus.Labels(n.labels))
+}
+
 // mergeLabels merges two or more labels objects into a single map, favoring
 // mergeLabels merges two or more labels objects into a single map, favoring
 // the later labels.
 // the later labels.
 func mergeLabels(lbs ...Labels) Labels {
 func mergeLabels(lbs ...Labels) Labels {