metrics.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "sync"
  4. "github.com/docker/docker/pkg/plugingetter"
  5. "github.com/docker/go-metrics"
  6. "github.com/pkg/errors"
  7. "github.com/prometheus/client_golang/prometheus"
  8. "github.com/sirupsen/logrus"
  9. )
  10. const metricsPluginType = "MetricsCollector"
  11. var (
  12. containerActions metrics.LabeledTimer
  13. networkActions metrics.LabeledTimer
  14. engineInfo metrics.LabeledGauge
  15. engineCpus metrics.Gauge
  16. engineMemory metrics.Gauge
  17. healthChecksCounter metrics.Counter
  18. healthChecksFailedCounter metrics.Counter
  19. stateCtr *stateCounter
  20. )
  21. func init() {
  22. ns := metrics.NewNamespace("engine", "daemon", nil)
  23. containerActions = ns.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
  24. for _, a := range []string{
  25. "start",
  26. "changes",
  27. "commit",
  28. "create",
  29. "delete",
  30. } {
  31. containerActions.WithValues(a).Update(0)
  32. }
  33. networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
  34. engineInfo = ns.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
  35. "version",
  36. "commit",
  37. "architecture",
  38. "graphdriver",
  39. "kernel", "os",
  40. "os_type",
  41. "daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
  42. )
  43. engineCpus = ns.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
  44. engineMemory = ns.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
  45. healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
  46. healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
  47. stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
  48. ns.Add(stateCtr)
  49. metrics.Register(ns)
  50. }
  51. type stateCounter struct {
  52. mu sync.Mutex
  53. states map[string]string
  54. desc *prometheus.Desc
  55. }
  56. func newStateCounter(desc *prometheus.Desc) *stateCounter {
  57. return &stateCounter{
  58. states: make(map[string]string),
  59. desc: desc,
  60. }
  61. }
  62. func (ctr *stateCounter) get() (running int, paused int, stopped int) {
  63. ctr.mu.Lock()
  64. defer ctr.mu.Unlock()
  65. states := map[string]int{
  66. "running": 0,
  67. "paused": 0,
  68. "stopped": 0,
  69. }
  70. for _, state := range ctr.states {
  71. states[state]++
  72. }
  73. return states["running"], states["paused"], states["stopped"]
  74. }
  75. func (ctr *stateCounter) set(id, label string) {
  76. ctr.mu.Lock()
  77. ctr.states[id] = label
  78. ctr.mu.Unlock()
  79. }
  80. func (ctr *stateCounter) del(id string) {
  81. ctr.mu.Lock()
  82. delete(ctr.states, id)
  83. ctr.mu.Unlock()
  84. }
  85. func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
  86. ch <- ctr.desc
  87. }
  88. func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
  89. running, paused, stopped := ctr.get()
  90. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
  91. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
  92. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
  93. }
  94. func (d *Daemon) cleanupMetricsPlugins() {
  95. ls := d.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
  96. var wg sync.WaitGroup
  97. wg.Add(len(ls))
  98. for _, plugin := range ls {
  99. p := plugin
  100. go func() {
  101. defer wg.Done()
  102. pluginStopMetricsCollection(p)
  103. }()
  104. }
  105. wg.Wait()
  106. if d.metricsPluginListener != nil {
  107. d.metricsPluginListener.Close()
  108. }
  109. }
  110. func pluginStartMetricsCollection(p plugingetter.CompatPlugin) error {
  111. type metricsPluginResponse struct {
  112. Err string
  113. }
  114. var res metricsPluginResponse
  115. if err := p.Client().Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
  116. return errors.Wrap(err, "could not start metrics plugin")
  117. }
  118. if res.Err != "" {
  119. return errors.New(res.Err)
  120. }
  121. return nil
  122. }
  123. func pluginStopMetricsCollection(p plugingetter.CompatPlugin) {
  124. if err := p.Client().Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
  125. logrus.WithError(err).WithField("name", p.Name()).Error("error stopping metrics collector")
  126. }
  127. }