metrics.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "context"
  4. "sync"
  5. "github.com/containerd/log"
  6. "github.com/docker/docker/errdefs"
  7. "github.com/docker/docker/pkg/plugingetter"
  8. "github.com/docker/docker/pkg/plugins"
  9. metrics "github.com/docker/go-metrics"
  10. "github.com/pkg/errors"
  11. "github.com/prometheus/client_golang/prometheus"
  12. )
  13. const metricsPluginType = "MetricsCollector"
  14. var (
  15. metricsNS = metrics.NewNamespace("engine", "daemon", nil)
  16. containerActions = metricsNS.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
  17. networkActions = metricsNS.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
  18. hostInfoFunctions = metricsNS.NewLabeledTimer("host_info_functions", "The number of seconds it takes to call functions gathering info about the host", "function")
  19. engineInfo = metricsNS.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
  20. "version",
  21. "commit",
  22. "architecture",
  23. "graphdriver",
  24. "kernel",
  25. "os",
  26. "os_type",
  27. "os_version",
  28. "daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
  29. )
  30. engineCpus = metricsNS.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
  31. engineMemory = metricsNS.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
  32. healthChecksCounter = metricsNS.NewCounter("health_checks", "The total number of health checks")
  33. healthChecksFailedCounter = metricsNS.NewCounter("health_checks_failed", "The total number of failed health checks")
  34. healthCheckStartDuration = metricsNS.NewTimer("health_check_start_duration", "The number of seconds it takes to prepare to run health checks")
  35. stateCtr = newStateCounter(metricsNS, metricsNS.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
  36. )
  37. func init() {
  38. for _, a := range []string{
  39. "start",
  40. "changes",
  41. "commit",
  42. "create",
  43. "delete",
  44. } {
  45. containerActions.WithValues(a).Update(0)
  46. }
  47. metrics.Register(metricsNS)
  48. }
  49. type stateCounter struct {
  50. mu sync.RWMutex
  51. states map[string]string
  52. desc *prometheus.Desc
  53. }
  54. func newStateCounter(ns *metrics.Namespace, desc *prometheus.Desc) *stateCounter {
  55. c := &stateCounter{
  56. states: make(map[string]string),
  57. desc: desc,
  58. }
  59. ns.Add(c)
  60. return c
  61. }
  62. func (ctr *stateCounter) get() (running int, paused int, stopped int) {
  63. ctr.mu.RLock()
  64. defer ctr.mu.RUnlock()
  65. states := map[string]int{
  66. "running": 0,
  67. "paused": 0,
  68. "stopped": 0,
  69. }
  70. for _, state := range ctr.states {
  71. states[state]++
  72. }
  73. return states["running"], states["paused"], states["stopped"]
  74. }
  75. func (ctr *stateCounter) set(id, label string) {
  76. ctr.mu.Lock()
  77. ctr.states[id] = label
  78. ctr.mu.Unlock()
  79. }
  80. func (ctr *stateCounter) del(id string) {
  81. ctr.mu.Lock()
  82. delete(ctr.states, id)
  83. ctr.mu.Unlock()
  84. }
  85. func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
  86. ch <- ctr.desc
  87. }
  88. func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
  89. running, paused, stopped := ctr.get()
  90. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
  91. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
  92. ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
  93. }
  94. func (daemon *Daemon) cleanupMetricsPlugins() {
  95. ls := daemon.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
  96. var wg sync.WaitGroup
  97. wg.Add(len(ls))
  98. for _, plugin := range ls {
  99. p := plugin
  100. go func() {
  101. defer wg.Done()
  102. adapter, err := makePluginAdapter(p)
  103. if err != nil {
  104. log.G(context.TODO()).WithError(err).WithField("plugin", p.Name()).Error("Error creating metrics plugin adapter")
  105. return
  106. }
  107. if err := adapter.StopMetrics(); err != nil {
  108. log.G(context.TODO()).WithError(err).WithField("plugin", p.Name()).Error("Error stopping plugin metrics collection")
  109. }
  110. }()
  111. }
  112. wg.Wait()
  113. if daemon.metricsPluginListener != nil {
  114. daemon.metricsPluginListener.Close()
  115. }
  116. }
  117. type metricsPlugin interface {
  118. StartMetrics() error
  119. StopMetrics() error
  120. }
  121. func makePluginAdapter(p plugingetter.CompatPlugin) (metricsPlugin, error) {
  122. if pc, ok := p.(plugingetter.PluginWithV1Client); ok {
  123. return &metricsPluginAdapter{pc.Client(), p.Name()}, nil
  124. }
  125. pa, ok := p.(plugingetter.PluginAddr)
  126. if !ok {
  127. return nil, errdefs.System(errors.Errorf("got unknown plugin type %T", p))
  128. }
  129. if pa.Protocol() != plugins.ProtocolSchemeHTTPV1 {
  130. return nil, errors.Errorf("plugin protocol not supported: %s", pa.Protocol())
  131. }
  132. addr := pa.Addr()
  133. client, err := plugins.NewClientWithTimeout(addr.Network()+"://"+addr.String(), nil, pa.Timeout())
  134. if err != nil {
  135. return nil, errors.Wrap(err, "error creating metrics plugin client")
  136. }
  137. return &metricsPluginAdapter{client, p.Name()}, nil
  138. }
  139. type metricsPluginAdapter struct {
  140. c *plugins.Client
  141. name string
  142. }
  143. func (a *metricsPluginAdapter) StartMetrics() error {
  144. type metricsPluginResponse struct {
  145. Err string
  146. }
  147. var res metricsPluginResponse
  148. if err := a.c.Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
  149. return errors.Wrap(err, "could not start metrics plugin")
  150. }
  151. if res.Err != "" {
  152. return errors.New(res.Err)
  153. }
  154. return nil
  155. }
  156. func (a *metricsPluginAdapter) StopMetrics() error {
  157. if err := a.c.Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
  158. return errors.Wrap(err, "error stopping metrics collector")
  159. }
  160. return nil
  161. }