metrics.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. package daemon
  2. import (
  3. "path/filepath"
  4. "sync"
  5. "github.com/docker/docker/pkg/mount"
  6. "github.com/docker/docker/pkg/plugingetter"
  7. metrics "github.com/docker/go-metrics"
  8. "github.com/pkg/errors"
  9. "github.com/prometheus/client_golang/prometheus"
  10. "github.com/sirupsen/logrus"
  11. )
  12. const metricsPluginType = "MetricsCollector"
  13. var (
  14. containerActions metrics.LabeledTimer
  15. imageActions metrics.LabeledTimer
  16. networkActions metrics.LabeledTimer
  17. engineInfo metrics.LabeledGauge
  18. engineCpus metrics.Gauge
  19. engineMemory metrics.Gauge
  20. healthChecksCounter metrics.Counter
  21. healthChecksFailedCounter metrics.Counter
  22. stateCtr *stateCounter
  23. )
  24. func init() {
  25. ns := metrics.NewNamespace("engine", "daemon", nil)
  26. containerActions = ns.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
  27. for _, a := range []string{
  28. "start",
  29. "changes",
  30. "commit",
  31. "create",
  32. "delete",
  33. } {
  34. containerActions.WithValues(a).Update(0)
  35. }
  36. networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
  37. engineInfo = ns.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
  38. "version",
  39. "commit",
  40. "architecture",
  41. "graphdriver",
  42. "kernel", "os",
  43. "os_type",
  44. "daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
  45. )
  46. engineCpus = ns.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
  47. engineMemory = ns.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
  48. healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
  49. healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
  50. imageActions = ns.NewLabeledTimer("image_actions", "The number of seconds it takes to process each image action", "action")
  51. stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
  52. ns.Add(stateCtr)
  53. metrics.Register(ns)
  54. }
  55. type stateCounter struct {
  56. mu sync.Mutex
  57. states map[string]string
  58. desc *prometheus.Desc
  59. }
  60. func newStateCounter(desc *prometheus.Desc) *stateCounter {
  61. return &stateCounter{
  62. states: make(map[string]string),
  63. desc: desc,
  64. }
  65. }
  66. func (ctr *stateCounter) get() (running int, paused int, stopped int) {
  67. ctr.mu.Lock()
  68. defer ctr.mu.Unlock()
  69. states := map[string]int{
  70. "running": 0,
  71. "paused": 0,
  72. "stopped": 0,
  73. }
  74. for _, state := range ctr.states {
  75. states[state]++
  76. }
  77. return states["running"], states["paused"], states["stopped"]
  78. }
  79. func (ctr *stateCounter) set(id, label string) {
  80. ctr.mu.Lock()
  81. ctr.states[id] = label
  82. ctr.mu.Unlock()
  83. }
  84. func (ctr *stateCounter) del(id string) {
  85. ctr.mu.Lock()
  86. delete(ctr.states, id)
  87. ctr.mu.Unlock()
  88. }
  89. func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
  90. ch <- ctr.desc
  91. }
  92. func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
  93. running, paused, stopped := ctr.get()
  94. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
  95. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
  96. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
  97. }
  98. func (d *Daemon) cleanupMetricsPlugins() {
  99. ls := d.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
  100. var wg sync.WaitGroup
  101. wg.Add(len(ls))
  102. for _, p := range ls {
  103. go func() {
  104. defer wg.Done()
  105. pluginStopMetricsCollection(p)
  106. }()
  107. }
  108. wg.Wait()
  109. if d.metricsPluginListener != nil {
  110. d.metricsPluginListener.Close()
  111. }
  112. }
  113. type metricsPlugin struct {
  114. plugingetter.CompatPlugin
  115. }
  116. func (p metricsPlugin) sock() string {
  117. return "metrics.sock"
  118. }
  119. func (p metricsPlugin) sockBase() string {
  120. return filepath.Join(p.BasePath(), "run", "docker")
  121. }
  122. func pluginStartMetricsCollection(p plugingetter.CompatPlugin) error {
  123. type metricsPluginResponse struct {
  124. Err string
  125. }
  126. var res metricsPluginResponse
  127. if err := p.Client().Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
  128. return errors.Wrap(err, "could not start metrics plugin")
  129. }
  130. if res.Err != "" {
  131. return errors.New(res.Err)
  132. }
  133. return nil
  134. }
  135. func pluginStopMetricsCollection(p plugingetter.CompatPlugin) {
  136. if err := p.Client().Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
  137. logrus.WithError(err).WithField("name", p.Name()).Error("error stopping metrics collector")
  138. }
  139. mp := metricsPlugin{p}
  140. sockPath := filepath.Join(mp.sockBase(), mp.sock())
  141. if err := mount.Unmount(sockPath); err != nil {
  142. if mounted, _ := mount.Mounted(sockPath); mounted {
  143. logrus.WithError(err).WithField("name", p.Name()).WithField("socket", sockPath).Error("error unmounting metrics socket for plugin")
  144. }
  145. }
  146. }