metrics.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "sync"
  4. "github.com/docker/docker/pkg/plugingetter"
  5. "github.com/docker/docker/pkg/plugins"
  6. "github.com/docker/go-metrics"
  7. "github.com/pkg/errors"
  8. "github.com/prometheus/client_golang/prometheus"
  9. "github.com/sirupsen/logrus"
  10. )
  11. const metricsPluginType = "MetricsCollector"
  12. var (
  13. containerActions metrics.LabeledTimer
  14. networkActions metrics.LabeledTimer
  15. engineInfo metrics.LabeledGauge
  16. engineCpus metrics.Gauge
  17. engineMemory metrics.Gauge
  18. healthChecksCounter metrics.Counter
  19. healthChecksFailedCounter metrics.Counter
  20. stateCtr *stateCounter
  21. )
  22. func init() {
  23. ns := metrics.NewNamespace("engine", "daemon", nil)
  24. containerActions = ns.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
  25. for _, a := range []string{
  26. "start",
  27. "changes",
  28. "commit",
  29. "create",
  30. "delete",
  31. } {
  32. containerActions.WithValues(a).Update(0)
  33. }
  34. networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
  35. engineInfo = ns.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
  36. "version",
  37. "commit",
  38. "architecture",
  39. "graphdriver",
  40. "kernel", "os",
  41. "os_type",
  42. "daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
  43. )
  44. engineCpus = ns.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
  45. engineMemory = ns.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
  46. healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
  47. healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
  48. stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
  49. ns.Add(stateCtr)
  50. metrics.Register(ns)
  51. }
  52. type stateCounter struct {
  53. mu sync.Mutex
  54. states map[string]string
  55. desc *prometheus.Desc
  56. }
  57. func newStateCounter(desc *prometheus.Desc) *stateCounter {
  58. return &stateCounter{
  59. states: make(map[string]string),
  60. desc: desc,
  61. }
  62. }
  63. func (ctr *stateCounter) get() (running int, paused int, stopped int) {
  64. ctr.mu.Lock()
  65. defer ctr.mu.Unlock()
  66. states := map[string]int{
  67. "running": 0,
  68. "paused": 0,
  69. "stopped": 0,
  70. }
  71. for _, state := range ctr.states {
  72. states[state]++
  73. }
  74. return states["running"], states["paused"], states["stopped"]
  75. }
  76. func (ctr *stateCounter) set(id, label string) {
  77. ctr.mu.Lock()
  78. ctr.states[id] = label
  79. ctr.mu.Unlock()
  80. }
  81. func (ctr *stateCounter) del(id string) {
  82. ctr.mu.Lock()
  83. delete(ctr.states, id)
  84. ctr.mu.Unlock()
  85. }
  86. func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
  87. ch <- ctr.desc
  88. }
  89. func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
  90. running, paused, stopped := ctr.get()
  91. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
  92. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
  93. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
  94. }
  95. func (d *Daemon) cleanupMetricsPlugins() {
  96. ls := d.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
  97. var wg sync.WaitGroup
  98. wg.Add(len(ls))
  99. for _, plugin := range ls {
  100. p := plugin
  101. go func() {
  102. defer wg.Done()
  103. adapter, err := makePluginAdapter(p)
  104. if err != nil {
  105. logrus.WithError(err).WithField("plugin", p.Name()).Error("Error creating metrics plugin adapater")
  106. return
  107. }
  108. if err := adapter.StopMetrics(); err != nil {
  109. logrus.WithError(err).WithField("plugin", p.Name()).Error("Error stopping plugin metrics collection")
  110. }
  111. }()
  112. }
  113. wg.Wait()
  114. if d.metricsPluginListener != nil {
  115. d.metricsPluginListener.Close()
  116. }
  117. }
  118. type metricsPlugin interface {
  119. StartMetrics() error
  120. StopMetrics() error
  121. }
  122. func makePluginAdapter(p plugingetter.CompatPlugin) (metricsPlugin, error) {
  123. pa, ok := p.(plugingetter.PluginAddr)
  124. if !ok {
  125. return &metricsPluginAdapter{p.Client(), p.Name()}, nil
  126. }
  127. if pa.Protocol() != plugins.ProtocolSchemeHTTPV1 {
  128. return nil, errors.Errorf("plugin protocol not supported: %s", pa.Protocol())
  129. }
  130. addr := pa.Addr()
  131. client, err := plugins.NewClientWithTimeout(addr.Network()+"://"+addr.String(), nil, pa.Timeout())
  132. if err != nil {
  133. return nil, errors.Wrap(err, "error creating metrics plugin client")
  134. }
  135. return &metricsPluginAdapter{client, p.Name()}, nil
  136. }
  137. type metricsPluginAdapter struct {
  138. c *plugins.Client
  139. name string
  140. }
  141. func (a *metricsPluginAdapter) StartMetrics() error {
  142. type metricsPluginResponse struct {
  143. Err string
  144. }
  145. var res metricsPluginResponse
  146. if err := a.c.Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
  147. return errors.Wrap(err, "could not start metrics plugin")
  148. }
  149. if res.Err != "" {
  150. return errors.New(res.Err)
  151. }
  152. return nil
  153. }
  154. func (a *metricsPluginAdapter) StopMetrics() error {
  155. if err := a.c.Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
  156. return errors.Wrap(err, "error stopping metrics collector")
  157. }
  158. return nil
  159. }