2018-02-05 21:05:59 +00:00
package daemon // import "github.com/docker/docker/daemon"
2016-07-20 23:11:28 +00:00
2017-02-10 02:57:35 +00:00
import (
"sync"
2018-05-30 19:00:42 +00:00
"github.com/docker/docker/errdefs"
2017-04-14 01:56:50 +00:00
"github.com/docker/docker/pkg/plugingetter"
2018-04-25 01:45:00 +00:00
"github.com/docker/docker/pkg/plugins"
2019-08-05 14:37:47 +00:00
metrics "github.com/docker/go-metrics"
2017-04-14 01:56:50 +00:00
"github.com/pkg/errors"
2017-02-10 02:57:35 +00:00
"github.com/prometheus/client_golang/prometheus"
2017-07-26 21:42:13 +00:00
"github.com/sirupsen/logrus"
2017-02-10 02:57:35 +00:00
)
2016-07-20 23:11:28 +00:00
2017-04-14 01:56:50 +00:00
const metricsPluginType = "MetricsCollector"
2016-07-20 23:11:28 +00:00
var (
containerActions metrics . LabeledTimer
networkActions metrics . LabeledTimer
2019-05-30 16:51:41 +00:00
hostInfoFunctions metrics . LabeledTimer
2017-04-24 11:32:01 +00:00
engineInfo metrics . LabeledGauge
2016-07-20 23:11:28 +00:00
engineCpus metrics . Gauge
engineMemory metrics . Gauge
healthChecksCounter metrics . Counter
healthChecksFailedCounter metrics . Counter
2017-02-10 02:57:35 +00:00
stateCtr * stateCounter
2016-07-20 23:11:28 +00:00
)
func init ( ) {
ns := metrics . NewNamespace ( "engine" , "daemon" , nil )
containerActions = ns . NewLabeledTimer ( "container_actions" , "The number of seconds it takes to process each container action" , "action" )
for _ , a := range [ ] string {
"start" ,
"changes" ,
"commit" ,
"create" ,
"delete" ,
} {
containerActions . WithValues ( a ) . Update ( 0 )
}
2019-05-30 16:51:41 +00:00
hostInfoFunctions = ns . NewLabeledTimer ( "host_info_functions" , "The number of seconds it takes to call functions gathering info about the host" , "function" )
2017-02-10 02:57:35 +00:00
2016-07-20 23:11:28 +00:00
networkActions = ns . NewLabeledTimer ( "network_actions" , "The number of seconds it takes to process each network action" , "action" )
2017-04-24 11:32:01 +00:00
engineInfo = ns . NewLabeledGauge ( "engine" , "The information related to the engine and the OS it is running on" , metrics . Unit ( "info" ) ,
2016-07-20 23:11:28 +00:00
"version" ,
"commit" ,
"architecture" ,
2017-04-24 11:32:01 +00:00
"graphdriver" ,
2019-05-30 16:51:41 +00:00
"kernel" ,
"os" ,
2017-04-24 11:32:01 +00:00
"os_type" ,
2019-05-30 16:51:41 +00:00
"os_version" ,
2017-04-24 11:32:01 +00:00
"daemon_id" , // ID is a randomly generated unique identifier (e.g. UUID4)
2016-07-20 23:11:28 +00:00
)
engineCpus = ns . NewGauge ( "engine_cpus" , "The number of cpus that the host system of the engine has" , metrics . Unit ( "cpus" ) )
engineMemory = ns . NewGauge ( "engine_memory" , "The number of bytes of memory that the host system of the engine has" , metrics . Bytes )
healthChecksCounter = ns . NewCounter ( "health_checks" , "The total number of health checks" )
healthChecksFailedCounter = ns . NewCounter ( "health_checks_failed" , "The total number of failed health checks" )
2017-02-10 02:57:35 +00:00
stateCtr = newStateCounter ( ns . NewDesc ( "container_states" , "The count of containers in various states" , metrics . Unit ( "containers" ) , "state" ) )
ns . Add ( stateCtr )
2016-07-20 23:11:28 +00:00
metrics . Register ( ns )
}
2017-02-10 02:57:35 +00:00
type stateCounter struct {
mu sync . Mutex
states map [ string ] string
desc * prometheus . Desc
}
func newStateCounter ( desc * prometheus . Desc ) * stateCounter {
return & stateCounter {
states : make ( map [ string ] string ) ,
desc : desc ,
}
}
func ( ctr * stateCounter ) get ( ) ( running int , paused int , stopped int ) {
ctr . mu . Lock ( )
defer ctr . mu . Unlock ( )
states := map [ string ] int {
"running" : 0 ,
"paused" : 0 ,
"stopped" : 0 ,
}
for _ , state := range ctr . states {
states [ state ] ++
}
return states [ "running" ] , states [ "paused" ] , states [ "stopped" ]
}
func ( ctr * stateCounter ) set ( id , label string ) {
ctr . mu . Lock ( )
ctr . states [ id ] = label
ctr . mu . Unlock ( )
}
func ( ctr * stateCounter ) del ( id string ) {
ctr . mu . Lock ( )
delete ( ctr . states , id )
ctr . mu . Unlock ( )
}
func ( ctr * stateCounter ) Describe ( ch chan <- * prometheus . Desc ) {
ch <- ctr . desc
}
func ( ctr * stateCounter ) Collect ( ch chan <- prometheus . Metric ) {
running , paused , stopped := ctr . get ( )
ch <- prometheus . MustNewConstMetric ( ctr . desc , prometheus . GaugeValue , float64 ( running ) , "running" )
ch <- prometheus . MustNewConstMetric ( ctr . desc , prometheus . GaugeValue , float64 ( paused ) , "paused" )
ch <- prometheus . MustNewConstMetric ( ctr . desc , prometheus . GaugeValue , float64 ( stopped ) , "stopped" )
}
2017-04-14 01:56:50 +00:00
2019-08-09 11:19:49 +00:00
func ( daemon * Daemon ) cleanupMetricsPlugins ( ) {
ls := daemon . PluginStore . GetAllManagedPluginsByCap ( metricsPluginType )
2017-04-14 01:56:50 +00:00
var wg sync . WaitGroup
wg . Add ( len ( ls ) )
2018-01-16 22:51:36 +00:00
for _ , plugin := range ls {
p := plugin
2017-04-14 01:56:50 +00:00
go func ( ) {
defer wg . Done ( )
2018-04-25 01:45:00 +00:00
adapter , err := makePluginAdapter ( p )
if err != nil {
2018-10-08 11:15:38 +00:00
logrus . WithError ( err ) . WithField ( "plugin" , p . Name ( ) ) . Error ( "Error creating metrics plugin adapter" )
2018-04-25 01:45:00 +00:00
return
}
if err := adapter . StopMetrics ( ) ; err != nil {
logrus . WithError ( err ) . WithField ( "plugin" , p . Name ( ) ) . Error ( "Error stopping plugin metrics collection" )
}
2017-04-14 01:56:50 +00:00
} ( )
}
wg . Wait ( )
2019-08-09 11:19:49 +00:00
if daemon . metricsPluginListener != nil {
daemon . metricsPluginListener . Close ( )
2017-04-14 01:56:50 +00:00
}
}
2018-04-25 01:45:00 +00:00
type metricsPlugin interface {
StartMetrics ( ) error
StopMetrics ( ) error
}
2019-03-12 23:37:35 +00:00
func makePluginAdapter ( p plugingetter . CompatPlugin ) ( metricsPlugin , error ) {
2018-05-30 19:00:42 +00:00
if pc , ok := p . ( plugingetter . PluginWithV1Client ) ; ok {
return & metricsPluginAdapter { pc . Client ( ) , p . Name ( ) } , nil
}
2018-04-25 01:45:00 +00:00
pa , ok := p . ( plugingetter . PluginAddr )
if ! ok {
2018-05-30 19:00:42 +00:00
return nil , errdefs . System ( errors . Errorf ( "got unknown plugin type %T" , p ) )
2018-04-25 01:45:00 +00:00
}
2018-05-30 19:00:42 +00:00
2018-04-25 01:45:00 +00:00
if pa . Protocol ( ) != plugins . ProtocolSchemeHTTPV1 {
return nil , errors . Errorf ( "plugin protocol not supported: %s" , pa . Protocol ( ) )
}
addr := pa . Addr ( )
client , err := plugins . NewClientWithTimeout ( addr . Network ( ) + "://" + addr . String ( ) , nil , pa . Timeout ( ) )
if err != nil {
return nil , errors . Wrap ( err , "error creating metrics plugin client" )
}
return & metricsPluginAdapter { client , p . Name ( ) } , nil
}
type metricsPluginAdapter struct {
c * plugins . Client
name string
}
func ( a * metricsPluginAdapter ) StartMetrics ( ) error {
2017-04-14 01:56:50 +00:00
type metricsPluginResponse struct {
Err string
}
var res metricsPluginResponse
2018-04-25 01:45:00 +00:00
if err := a . c . Call ( metricsPluginType + ".StartMetrics" , nil , & res ) ; err != nil {
2017-04-14 01:56:50 +00:00
return errors . Wrap ( err , "could not start metrics plugin" )
}
if res . Err != "" {
return errors . New ( res . Err )
}
return nil
}
2018-04-25 01:45:00 +00:00
func ( a * metricsPluginAdapter ) StopMetrics ( ) error {
if err := a . c . Call ( metricsPluginType + ".StopMetrics" , nil , nil ) ; err != nil {
return errors . Wrap ( err , "error stopping metrics collector" )
2017-04-14 01:56:50 +00:00
}
2018-04-25 01:45:00 +00:00
return nil
2017-04-14 01:56:50 +00:00
}