e4c03623c2
Container state counts are used for reporting in the `/info` endpoint. Currently when `/info` is called, each container is iterated over and the containers 'StateString()' is called. This is not very efficient with lots of containers, and is also racey since `StateString()` is not using a mutex and the mutex is not otherwise locked. We could just lock the container mutex, but this is proven to be problematic since there are frequent deadlock scenarios and we should always have the `/info` endpoint available since this endpoint is used to get general information about the docker host. Really, these metrics on `/info` should be deprecated. But until then, we can just keep a running tally in memory for each of the reported states. Signed-off-by: Brian Goff <cpuguy83@gmail.com>
106 lines
3.2 KiB
Go
106 lines
3.2 KiB
Go
package daemon
|
|
|
|
import (
|
|
"sync"
|
|
|
|
"github.com/docker/go-metrics"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
var (
|
|
containerActions metrics.LabeledTimer
|
|
containerStates metrics.LabeledGauge
|
|
imageActions metrics.LabeledTimer
|
|
networkActions metrics.LabeledTimer
|
|
engineVersion metrics.LabeledGauge
|
|
engineCpus metrics.Gauge
|
|
engineMemory metrics.Gauge
|
|
healthChecksCounter metrics.Counter
|
|
healthChecksFailedCounter metrics.Counter
|
|
|
|
stateCtr *stateCounter
|
|
)
|
|
|
|
func init() {
|
|
ns := metrics.NewNamespace("engine", "daemon", nil)
|
|
containerActions = ns.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
|
|
for _, a := range []string{
|
|
"start",
|
|
"changes",
|
|
"commit",
|
|
"create",
|
|
"delete",
|
|
} {
|
|
containerActions.WithValues(a).Update(0)
|
|
}
|
|
|
|
networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
|
|
engineVersion = ns.NewLabeledGauge("engine", "The version and commit information for the engine process", metrics.Unit("info"),
|
|
"version",
|
|
"commit",
|
|
"architecture",
|
|
"graph_driver", "kernel",
|
|
"os",
|
|
)
|
|
engineCpus = ns.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
|
|
engineMemory = ns.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
|
|
healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
|
|
healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
|
|
imageActions = ns.NewLabeledTimer("image_actions", "The number of seconds it takes to process each image action", "action")
|
|
|
|
stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
|
|
ns.Add(stateCtr)
|
|
|
|
metrics.Register(ns)
|
|
}
|
|
|
|
type stateCounter struct {
|
|
mu sync.Mutex
|
|
states map[string]string
|
|
desc *prometheus.Desc
|
|
}
|
|
|
|
func newStateCounter(desc *prometheus.Desc) *stateCounter {
|
|
return &stateCounter{
|
|
states: make(map[string]string),
|
|
desc: desc,
|
|
}
|
|
}
|
|
|
|
func (ctr *stateCounter) get() (running int, paused int, stopped int) {
|
|
ctr.mu.Lock()
|
|
defer ctr.mu.Unlock()
|
|
|
|
states := map[string]int{
|
|
"running": 0,
|
|
"paused": 0,
|
|
"stopped": 0,
|
|
}
|
|
for _, state := range ctr.states {
|
|
states[state]++
|
|
}
|
|
return states["running"], states["paused"], states["stopped"]
|
|
}
|
|
|
|
func (ctr *stateCounter) set(id, label string) {
|
|
ctr.mu.Lock()
|
|
ctr.states[id] = label
|
|
ctr.mu.Unlock()
|
|
}
|
|
|
|
func (ctr *stateCounter) del(id string) {
|
|
ctr.mu.Lock()
|
|
delete(ctr.states, id)
|
|
ctr.mu.Unlock()
|
|
}
|
|
|
|
func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
|
|
ch <- ctr.desc
|
|
}
|
|
|
|
func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
|
|
running, paused, stopped := ctr.get()
|
|
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
|
|
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
|
|
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
|
|
}
|