metrics.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "sync"
  4. "github.com/docker/docker/errdefs"
  5. "github.com/docker/docker/pkg/plugingetter"
  6. "github.com/docker/docker/pkg/plugins"
  7. "github.com/docker/go-metrics"
  8. "github.com/pkg/errors"
  9. "github.com/prometheus/client_golang/prometheus"
  10. "github.com/sirupsen/logrus"
  11. )
  12. const metricsPluginType = "MetricsCollector"
  13. var (
  14. containerActions metrics.LabeledTimer
  15. networkActions metrics.LabeledTimer
  16. hostInfoFunctions metrics.LabeledTimer
  17. engineInfo metrics.LabeledGauge
  18. engineCpus metrics.Gauge
  19. engineMemory metrics.Gauge
  20. healthChecksCounter metrics.Counter
  21. healthChecksFailedCounter metrics.Counter
  22. stateCtr *stateCounter
  23. )
  24. func init() {
  25. ns := metrics.NewNamespace("engine", "daemon", nil)
  26. containerActions = ns.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
  27. for _, a := range []string{
  28. "start",
  29. "changes",
  30. "commit",
  31. "create",
  32. "delete",
  33. } {
  34. containerActions.WithValues(a).Update(0)
  35. }
  36. hostInfoFunctions = ns.NewLabeledTimer("host_info_functions", "The number of seconds it takes to call functions gathering info about the host", "function")
  37. networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
  38. engineInfo = ns.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
  39. "version",
  40. "commit",
  41. "architecture",
  42. "graphdriver",
  43. "kernel",
  44. "os",
  45. "os_type",
  46. "os_version",
  47. "daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
  48. )
  49. engineCpus = ns.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
  50. engineMemory = ns.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
  51. healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
  52. healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
  53. stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
  54. ns.Add(stateCtr)
  55. metrics.Register(ns)
  56. }
  57. type stateCounter struct {
  58. mu sync.Mutex
  59. states map[string]string
  60. desc *prometheus.Desc
  61. }
  62. func newStateCounter(desc *prometheus.Desc) *stateCounter {
  63. return &stateCounter{
  64. states: make(map[string]string),
  65. desc: desc,
  66. }
  67. }
  68. func (ctr *stateCounter) get() (running int, paused int, stopped int) {
  69. ctr.mu.Lock()
  70. defer ctr.mu.Unlock()
  71. states := map[string]int{
  72. "running": 0,
  73. "paused": 0,
  74. "stopped": 0,
  75. }
  76. for _, state := range ctr.states {
  77. states[state]++
  78. }
  79. return states["running"], states["paused"], states["stopped"]
  80. }
  81. func (ctr *stateCounter) set(id, label string) {
  82. ctr.mu.Lock()
  83. ctr.states[id] = label
  84. ctr.mu.Unlock()
  85. }
  86. func (ctr *stateCounter) del(id string) {
  87. ctr.mu.Lock()
  88. delete(ctr.states, id)
  89. ctr.mu.Unlock()
  90. }
  91. func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
  92. ch <- ctr.desc
  93. }
  94. func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
  95. running, paused, stopped := ctr.get()
  96. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
  97. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
  98. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
  99. }
  100. func (d *Daemon) cleanupMetricsPlugins() {
  101. ls := d.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
  102. var wg sync.WaitGroup
  103. wg.Add(len(ls))
  104. for _, plugin := range ls {
  105. p := plugin
  106. go func() {
  107. defer wg.Done()
  108. adapter, err := makePluginAdapter(p)
  109. if err != nil {
  110. logrus.WithError(err).WithField("plugin", p.Name()).Error("Error creating metrics plugin adapter")
  111. return
  112. }
  113. if err := adapter.StopMetrics(); err != nil {
  114. logrus.WithError(err).WithField("plugin", p.Name()).Error("Error stopping plugin metrics collection")
  115. }
  116. }()
  117. }
  118. wg.Wait()
  119. if d.metricsPluginListener != nil {
  120. d.metricsPluginListener.Close()
  121. }
  122. }
  123. type metricsPlugin interface {
  124. StartMetrics() error
  125. StopMetrics() error
  126. }
  127. func makePluginAdapter(p plugingetter.CompatPlugin) (metricsPlugin, error) {
  128. if pc, ok := p.(plugingetter.PluginWithV1Client); ok {
  129. return &metricsPluginAdapter{pc.Client(), p.Name()}, nil
  130. }
  131. pa, ok := p.(plugingetter.PluginAddr)
  132. if !ok {
  133. return nil, errdefs.System(errors.Errorf("got unknown plugin type %T", p))
  134. }
  135. if pa.Protocol() != plugins.ProtocolSchemeHTTPV1 {
  136. return nil, errors.Errorf("plugin protocol not supported: %s", pa.Protocol())
  137. }
  138. addr := pa.Addr()
  139. client, err := plugins.NewClientWithTimeout(addr.Network()+"://"+addr.String(), nil, pa.Timeout())
  140. if err != nil {
  141. return nil, errors.Wrap(err, "error creating metrics plugin client")
  142. }
  143. return &metricsPluginAdapter{client, p.Name()}, nil
  144. }
  145. type metricsPluginAdapter struct {
  146. c *plugins.Client
  147. name string
  148. }
  149. func (a *metricsPluginAdapter) StartMetrics() error {
  150. type metricsPluginResponse struct {
  151. Err string
  152. }
  153. var res metricsPluginResponse
  154. if err := a.c.Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
  155. return errors.Wrap(err, "could not start metrics plugin")
  156. }
  157. if res.Err != "" {
  158. return errors.New(res.Err)
  159. }
  160. return nil
  161. }
  162. func (a *metricsPluginAdapter) StopMetrics() error {
  163. if err := a.c.Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
  164. return errors.Wrap(err, "error stopping metrics collector")
  165. }
  166. return nil
  167. }