|
@@ -15,15 +15,22 @@ package prometheus
|
|
|
|
|
|
import (
|
|
|
"bytes"
|
|
|
- "errors"
|
|
|
"fmt"
|
|
|
+ "io/ioutil"
|
|
|
"os"
|
|
|
+ "path/filepath"
|
|
|
+ "runtime"
|
|
|
"sort"
|
|
|
+ "strings"
|
|
|
"sync"
|
|
|
+ "unicode/utf8"
|
|
|
|
|
|
"github.com/golang/protobuf/proto"
|
|
|
+ "github.com/prometheus/common/expfmt"
|
|
|
|
|
|
dto "github.com/prometheus/client_model/go"
|
|
|
+
|
|
|
+ "github.com/prometheus/client_golang/prometheus/internal"
|
|
|
)
|
|
|
|
|
|
const (
|
|
@@ -35,13 +42,14 @@ const (
|
|
|
// DefaultRegisterer and DefaultGatherer are the implementations of the
|
|
|
// Registerer and Gatherer interface a number of convenience functions in this
|
|
|
// package act on. Initially, both variables point to the same Registry, which
|
|
|
-// has a process collector (see NewProcessCollector) and a Go collector (see
|
|
|
-// NewGoCollector) already registered. This approach to keep default instances
|
|
|
-// as global state mirrors the approach of other packages in the Go standard
|
|
|
-// library. Note that there are caveats. Change the variables with caution and
|
|
|
-// only if you understand the consequences. Users who want to avoid global state
|
|
|
-// altogether should not use the convenience function and act on custom
|
|
|
-// instances instead.
|
|
|
+// has a process collector (currently on Linux only, see NewProcessCollector)
|
|
|
+// and a Go collector (see NewGoCollector, in particular the note about
|
|
|
+// stop-the-world implication with Go versions older than 1.9) already
|
|
|
+// registered. This approach to keep default instances as global state mirrors
|
|
|
+// the approach of other packages in the Go standard library. Note that there
|
|
|
+// are caveats. Change the variables with caution and only if you understand the
|
|
|
+// consequences. Users who want to avoid global state altogether should not use
|
|
|
+// the convenience functions and act on custom instances instead.
|
|
|
var (
|
|
|
defaultRegistry = NewRegistry()
|
|
|
DefaultRegisterer Registerer = defaultRegistry
|
|
@@ -49,7 +57,7 @@ var (
|
|
|
)
|
|
|
|
|
|
func init() {
|
|
|
- MustRegister(NewProcessCollector(os.Getpid(), ""))
|
|
|
+ MustRegister(NewProcessCollector(ProcessCollectorOpts{}))
|
|
|
MustRegister(NewGoCollector())
|
|
|
}
|
|
|
|
|
@@ -65,7 +73,8 @@ func NewRegistry() *Registry {
|
|
|
|
|
|
// NewPedanticRegistry returns a registry that checks during collection if each
|
|
|
// collected Metric is consistent with its reported Desc, and if the Desc has
|
|
|
-// actually been registered with the registry.
|
|
|
+// actually been registered with the registry. Unchecked Collectors (those whose
|
|
|
+// Describe methed does not yield any descriptors) are excluded from the check.
|
|
|
//
|
|
|
// Usually, a Registry will be happy as long as the union of all collected
|
|
|
// Metrics is consistent and valid even if some metrics are not consistent with
|
|
@@ -80,7 +89,7 @@ func NewPedanticRegistry() *Registry {
|
|
|
|
|
|
// Registerer is the interface for the part of a registry in charge of
|
|
|
// registering and unregistering. Users of custom registries should use
|
|
|
-// Registerer as type for registration purposes (rather then the Registry type
|
|
|
+// Registerer as type for registration purposes (rather than the Registry type
|
|
|
// directly). In that way, they are free to use custom Registerer implementation
|
|
|
// (e.g. for testing purposes).
|
|
|
type Registerer interface {
|
|
@@ -95,8 +104,13 @@ type Registerer interface {
|
|
|
// returned error is an instance of AlreadyRegisteredError, which
|
|
|
// contains the previously registered Collector.
|
|
|
//
|
|
|
- // It is in general not safe to register the same Collector multiple
|
|
|
- // times concurrently.
|
|
|
+ // A Collector whose Describe method does not yield any Desc is treated
|
|
|
+ // as unchecked. Registration will always succeed. No check for
|
|
|
+ // re-registering (see previous paragraph) is performed. Thus, the
|
|
|
+ // caller is responsible for not double-registering the same unchecked
|
|
|
+ // Collector, and for providing a Collector that will not cause
|
|
|
+ // inconsistent metrics on collection. (This would lead to scrape
|
|
|
+ // errors.)
|
|
|
Register(Collector) error
|
|
|
// MustRegister works like Register but registers any number of
|
|
|
// Collectors and panics upon the first registration that causes an
|
|
@@ -105,7 +119,9 @@ type Registerer interface {
|
|
|
// Unregister unregisters the Collector that equals the Collector passed
|
|
|
// in as an argument. (Two Collectors are considered equal if their
|
|
|
// Describe method yields the same set of descriptors.) The function
|
|
|
- // returns whether a Collector was unregistered.
|
|
|
+ // returns whether a Collector was unregistered. Note that an unchecked
|
|
|
+ // Collector cannot be unregistered (as its Describe method does not
|
|
|
+ // yield any descriptor).
|
|
|
//
|
|
|
// Note that even after unregistering, it will not be possible to
|
|
|
// register a new Collector that is inconsistent with the unregistered
|
|
@@ -123,15 +139,23 @@ type Registerer interface {
|
|
|
type Gatherer interface {
|
|
|
// Gather calls the Collect method of the registered Collectors and then
|
|
|
// gathers the collected metrics into a lexicographically sorted slice
|
|
|
- // of MetricFamily protobufs. Even if an error occurs, Gather attempts
|
|
|
- // to gather as many metrics as possible. Hence, if a non-nil error is
|
|
|
- // returned, the returned MetricFamily slice could be nil (in case of a
|
|
|
- // fatal error that prevented any meaningful metric collection) or
|
|
|
- // contain a number of MetricFamily protobufs, some of which might be
|
|
|
- // incomplete, and some might be missing altogether. The returned error
|
|
|
- // (which might be a MultiError) explains the details. In scenarios
|
|
|
- // where complete collection is critical, the returned MetricFamily
|
|
|
- // protobufs should be disregarded if the returned error is non-nil.
|
|
|
+ // of uniquely named MetricFamily protobufs. Gather ensures that the
|
|
|
+ // returned slice is valid and self-consistent so that it can be used
|
|
|
+ // for valid exposition. As an exception to the strict consistency
|
|
|
+ // requirements described for metric.Desc, Gather will tolerate
|
|
|
+ // different sets of label names for metrics of the same metric family.
|
|
|
+ //
|
|
|
+ // Even if an error occurs, Gather attempts to gather as many metrics as
|
|
|
+ // possible. Hence, if a non-nil error is returned, the returned
|
|
|
+ // MetricFamily slice could be nil (in case of a fatal error that
|
|
|
+ // prevented any meaningful metric collection) or contain a number of
|
|
|
+ // MetricFamily protobufs, some of which might be incomplete, and some
|
|
|
+ // might be missing altogether. The returned error (which might be a
|
|
|
+ // MultiError) explains the details. Note that this is mostly useful for
|
|
|
+ // debugging purposes. If the gathered protobufs are to be used for
|
|
|
+ // exposition in actual monitoring, it is almost always better to not
|
|
|
+ // expose an incomplete result and instead disregard the returned
|
|
|
+ // MetricFamily protobufs in case the returned error is non-nil.
|
|
|
Gather() ([]*dto.MetricFamily, error)
|
|
|
}
|
|
|
|
|
@@ -152,38 +176,6 @@ func MustRegister(cs ...Collector) {
|
|
|
DefaultRegisterer.MustRegister(cs...)
|
|
|
}
|
|
|
|
|
|
-// RegisterOrGet registers the provided Collector with the DefaultRegisterer and
|
|
|
-// returns the Collector, unless an equal Collector was registered before, in
|
|
|
-// which case that Collector is returned.
|
|
|
-//
|
|
|
-// Deprecated: RegisterOrGet is merely a convenience function for the
|
|
|
-// implementation as described in the documentation for
|
|
|
-// AlreadyRegisteredError. As the use case is relatively rare, this function
|
|
|
-// will be removed in a future version of this package to clean up the
|
|
|
-// namespace.
|
|
|
-func RegisterOrGet(c Collector) (Collector, error) {
|
|
|
- if err := Register(c); err != nil {
|
|
|
- if are, ok := err.(AlreadyRegisteredError); ok {
|
|
|
- return are.ExistingCollector, nil
|
|
|
- }
|
|
|
- return nil, err
|
|
|
- }
|
|
|
- return c, nil
|
|
|
-}
|
|
|
-
|
|
|
-// MustRegisterOrGet behaves like RegisterOrGet but panics instead of returning
|
|
|
-// an error.
|
|
|
-//
|
|
|
-// Deprecated: This is deprecated for the same reason RegisterOrGet is. See
|
|
|
-// there for details.
|
|
|
-func MustRegisterOrGet(c Collector) Collector {
|
|
|
- c, err := RegisterOrGet(c)
|
|
|
- if err != nil {
|
|
|
- panic(err)
|
|
|
- }
|
|
|
- return c
|
|
|
-}
|
|
|
-
|
|
|
// Unregister removes the registration of the provided Collector from the
|
|
|
// DefaultRegisterer.
|
|
|
//
|
|
@@ -201,25 +193,6 @@ func (gf GathererFunc) Gather() ([]*dto.MetricFamily, error) {
|
|
|
return gf()
|
|
|
}
|
|
|
|
|
|
-// SetMetricFamilyInjectionHook replaces the DefaultGatherer with one that
|
|
|
-// gathers from the previous DefaultGatherers but then merges the MetricFamily
|
|
|
-// protobufs returned from the provided hook function with the MetricFamily
|
|
|
-// protobufs returned from the original DefaultGatherer.
|
|
|
-//
|
|
|
-// Deprecated: This function manipulates the DefaultGatherer variable. Consider
|
|
|
-// the implications, i.e. don't do this concurrently with any uses of the
|
|
|
-// DefaultGatherer. In the rare cases where you need to inject MetricFamily
|
|
|
-// protobufs directly, it is recommended to use a custom Registry and combine it
|
|
|
-// with a custom Gatherer using the Gatherers type (see
|
|
|
-// there). SetMetricFamilyInjectionHook only exists for compatibility reasons
|
|
|
-// with previous versions of this package.
|
|
|
-func SetMetricFamilyInjectionHook(hook func() []*dto.MetricFamily) {
|
|
|
- DefaultGatherer = Gatherers{
|
|
|
- DefaultGatherer,
|
|
|
- GathererFunc(func() ([]*dto.MetricFamily, error) { return hook(), nil }),
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
// AlreadyRegisteredError is returned by the Register method if the Collector to
|
|
|
// be registered has already been registered before, or a different Collector
|
|
|
// that collects the same metrics has been registered before. Registration fails
|
|
@@ -252,6 +225,13 @@ func (errs MultiError) Error() string {
|
|
|
return buf.String()
|
|
|
}
|
|
|
|
|
|
+// Append appends the provided error if it is not nil.
|
|
|
+func (errs *MultiError) Append(err error) {
|
|
|
+ if err != nil {
|
|
|
+ *errs = append(*errs, err)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
// MaybeUnwrap returns nil if len(errs) is 0. It returns the first and only
|
|
|
// contained error as error if len(errs is 1). In all other cases, it returns
|
|
|
// the MultiError directly. This is helpful for returning a MultiError in a way
|
|
@@ -276,6 +256,7 @@ type Registry struct {
|
|
|
collectorsByID map[uint64]Collector // ID is a hash of the descIDs.
|
|
|
descIDs map[uint64]struct{}
|
|
|
dimHashesByName map[string]uint64
|
|
|
+ uncheckedCollectors []Collector
|
|
|
pedanticChecksEnabled bool
|
|
|
}
|
|
|
|
|
@@ -293,8 +274,13 @@ func (r *Registry) Register(c Collector) error {
|
|
|
close(descChan)
|
|
|
}()
|
|
|
r.mtx.Lock()
|
|
|
- defer r.mtx.Unlock()
|
|
|
- // Coduct various tests...
|
|
|
+ defer func() {
|
|
|
+ // Drain channel in case of premature return to not leak a goroutine.
|
|
|
+ for range descChan {
|
|
|
+ }
|
|
|
+ r.mtx.Unlock()
|
|
|
+ }()
|
|
|
+ // Conduct various tests...
|
|
|
for desc := range descChan {
|
|
|
|
|
|
// Is the descriptor valid at all?
|
|
@@ -333,9 +319,10 @@ func (r *Registry) Register(c Collector) error {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- // Did anything happen at all?
|
|
|
+ // A Collector yielding no Desc at all is considered unchecked.
|
|
|
if len(newDescIDs) == 0 {
|
|
|
- return errors.New("collector has no descriptors")
|
|
|
+ r.uncheckedCollectors = append(r.uncheckedCollectors, c)
|
|
|
+ return nil
|
|
|
}
|
|
|
if existing, exists := r.collectorsByID[collectorID]; exists {
|
|
|
return AlreadyRegisteredError{
|
|
@@ -409,31 +396,25 @@ func (r *Registry) MustRegister(cs ...Collector) {
|
|
|
// Gather implements Gatherer.
|
|
|
func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
|
|
|
var (
|
|
|
- metricChan = make(chan Metric, capMetricChan)
|
|
|
- metricHashes = map[uint64]struct{}{}
|
|
|
- dimHashes = map[string]uint64{}
|
|
|
- wg sync.WaitGroup
|
|
|
- errs MultiError // The collected errors to return in the end.
|
|
|
- registeredDescIDs map[uint64]struct{} // Only used for pedantic checks
|
|
|
+ checkedMetricChan = make(chan Metric, capMetricChan)
|
|
|
+ uncheckedMetricChan = make(chan Metric, capMetricChan)
|
|
|
+ metricHashes = map[uint64]struct{}{}
|
|
|
+ wg sync.WaitGroup
|
|
|
+ errs MultiError // The collected errors to return in the end.
|
|
|
+ registeredDescIDs map[uint64]struct{} // Only used for pedantic checks
|
|
|
)
|
|
|
|
|
|
r.mtx.RLock()
|
|
|
+ goroutineBudget := len(r.collectorsByID) + len(r.uncheckedCollectors)
|
|
|
metricFamiliesByName := make(map[string]*dto.MetricFamily, len(r.dimHashesByName))
|
|
|
-
|
|
|
- // Scatter.
|
|
|
- // (Collectors could be complex and slow, so we call them all at once.)
|
|
|
- wg.Add(len(r.collectorsByID))
|
|
|
- go func() {
|
|
|
- wg.Wait()
|
|
|
- close(metricChan)
|
|
|
- }()
|
|
|
+ checkedCollectors := make(chan Collector, len(r.collectorsByID))
|
|
|
+ uncheckedCollectors := make(chan Collector, len(r.uncheckedCollectors))
|
|
|
for _, collector := range r.collectorsByID {
|
|
|
- go func(collector Collector) {
|
|
|
- defer wg.Done()
|
|
|
- collector.Collect(metricChan)
|
|
|
- }(collector)
|
|
|
+ checkedCollectors <- collector
|
|
|
+ }
|
|
|
+ for _, collector := range r.uncheckedCollectors {
|
|
|
+ uncheckedCollectors <- collector
|
|
|
}
|
|
|
-
|
|
|
// In case pedantic checks are enabled, we have to copy the map before
|
|
|
// giving up the RLock.
|
|
|
if r.pedanticChecksEnabled {
|
|
@@ -442,133 +423,264 @@ func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
|
|
|
registeredDescIDs[id] = struct{}{}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
r.mtx.RUnlock()
|
|
|
|
|
|
- // Drain metricChan in case of premature return.
|
|
|
+ wg.Add(goroutineBudget)
|
|
|
+
|
|
|
+ collectWorker := func() {
|
|
|
+ for {
|
|
|
+ select {
|
|
|
+ case collector := <-checkedCollectors:
|
|
|
+ collector.Collect(checkedMetricChan)
|
|
|
+ case collector := <-uncheckedCollectors:
|
|
|
+ collector.Collect(uncheckedMetricChan)
|
|
|
+ default:
|
|
|
+ return
|
|
|
+ }
|
|
|
+ wg.Done()
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Start the first worker now to make sure at least one is running.
|
|
|
+ go collectWorker()
|
|
|
+ goroutineBudget--
|
|
|
+
|
|
|
+ // Close checkedMetricChan and uncheckedMetricChan once all collectors
|
|
|
+ // are collected.
|
|
|
+ go func() {
|
|
|
+ wg.Wait()
|
|
|
+ close(checkedMetricChan)
|
|
|
+ close(uncheckedMetricChan)
|
|
|
+ }()
|
|
|
+
|
|
|
+ // Drain checkedMetricChan and uncheckedMetricChan in case of premature return.
|
|
|
defer func() {
|
|
|
- for _ = range metricChan {
|
|
|
+ if checkedMetricChan != nil {
|
|
|
+ for range checkedMetricChan {
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if uncheckedMetricChan != nil {
|
|
|
+ for range uncheckedMetricChan {
|
|
|
+ }
|
|
|
}
|
|
|
}()
|
|
|
|
|
|
- // Gather.
|
|
|
- for metric := range metricChan {
|
|
|
- // This could be done concurrently, too, but it required locking
|
|
|
- // of metricFamiliesByName (and of metricHashes if checks are
|
|
|
- // enabled). Most likely not worth it.
|
|
|
- desc := metric.Desc()
|
|
|
- dtoMetric := &dto.Metric{}
|
|
|
- if err := metric.Write(dtoMetric); err != nil {
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "error collecting metric %v: %s", desc, err,
|
|
|
+ // Copy the channel references so we can nil them out later to remove
|
|
|
+ // them from the select statements below.
|
|
|
+ cmc := checkedMetricChan
|
|
|
+ umc := uncheckedMetricChan
|
|
|
+
|
|
|
+ for {
|
|
|
+ select {
|
|
|
+ case metric, ok := <-cmc:
|
|
|
+ if !ok {
|
|
|
+ cmc = nil
|
|
|
+ break
|
|
|
+ }
|
|
|
+ errs.Append(processMetric(
|
|
|
+ metric, metricFamiliesByName,
|
|
|
+ metricHashes,
|
|
|
+ registeredDescIDs,
|
|
|
))
|
|
|
- continue
|
|
|
- }
|
|
|
- metricFamily, ok := metricFamiliesByName[desc.fqName]
|
|
|
- if ok {
|
|
|
- if metricFamily.GetHelp() != desc.help {
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "collected metric %s %s has help %q but should have %q",
|
|
|
- desc.fqName, dtoMetric, desc.help, metricFamily.GetHelp(),
|
|
|
- ))
|
|
|
- continue
|
|
|
+ case metric, ok := <-umc:
|
|
|
+ if !ok {
|
|
|
+ umc = nil
|
|
|
+ break
|
|
|
}
|
|
|
- // TODO(beorn7): Simplify switch once Desc has type.
|
|
|
- switch metricFamily.GetType() {
|
|
|
- case dto.MetricType_COUNTER:
|
|
|
- if dtoMetric.Counter == nil {
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "collected metric %s %s should be a Counter",
|
|
|
- desc.fqName, dtoMetric,
|
|
|
- ))
|
|
|
- continue
|
|
|
- }
|
|
|
- case dto.MetricType_GAUGE:
|
|
|
- if dtoMetric.Gauge == nil {
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "collected metric %s %s should be a Gauge",
|
|
|
- desc.fqName, dtoMetric,
|
|
|
- ))
|
|
|
- continue
|
|
|
- }
|
|
|
- case dto.MetricType_SUMMARY:
|
|
|
- if dtoMetric.Summary == nil {
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "collected metric %s %s should be a Summary",
|
|
|
- desc.fqName, dtoMetric,
|
|
|
- ))
|
|
|
- continue
|
|
|
- }
|
|
|
- case dto.MetricType_UNTYPED:
|
|
|
- if dtoMetric.Untyped == nil {
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "collected metric %s %s should be Untyped",
|
|
|
- desc.fqName, dtoMetric,
|
|
|
+ errs.Append(processMetric(
|
|
|
+ metric, metricFamiliesByName,
|
|
|
+ metricHashes,
|
|
|
+ nil,
|
|
|
+ ))
|
|
|
+ default:
|
|
|
+ if goroutineBudget <= 0 || len(checkedCollectors)+len(uncheckedCollectors) == 0 {
|
|
|
+ // All collectors are already being worked on or
|
|
|
+ // we have already as many goroutines started as
|
|
|
+ // there are collectors. Do the same as above,
|
|
|
+ // just without the default.
|
|
|
+ select {
|
|
|
+ case metric, ok := <-cmc:
|
|
|
+ if !ok {
|
|
|
+ cmc = nil
|
|
|
+ break
|
|
|
+ }
|
|
|
+ errs.Append(processMetric(
|
|
|
+ metric, metricFamiliesByName,
|
|
|
+ metricHashes,
|
|
|
+ registeredDescIDs,
|
|
|
))
|
|
|
- continue
|
|
|
- }
|
|
|
- case dto.MetricType_HISTOGRAM:
|
|
|
- if dtoMetric.Histogram == nil {
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "collected metric %s %s should be a Histogram",
|
|
|
- desc.fqName, dtoMetric,
|
|
|
+ case metric, ok := <-umc:
|
|
|
+ if !ok {
|
|
|
+ umc = nil
|
|
|
+ break
|
|
|
+ }
|
|
|
+ errs.Append(processMetric(
|
|
|
+ metric, metricFamiliesByName,
|
|
|
+ metricHashes,
|
|
|
+ nil,
|
|
|
))
|
|
|
- continue
|
|
|
}
|
|
|
- default:
|
|
|
- panic("encountered MetricFamily with invalid type")
|
|
|
+ break
|
|
|
}
|
|
|
- } else {
|
|
|
- metricFamily = &dto.MetricFamily{}
|
|
|
- metricFamily.Name = proto.String(desc.fqName)
|
|
|
- metricFamily.Help = proto.String(desc.help)
|
|
|
- // TODO(beorn7): Simplify switch once Desc has type.
|
|
|
- switch {
|
|
|
- case dtoMetric.Gauge != nil:
|
|
|
- metricFamily.Type = dto.MetricType_GAUGE.Enum()
|
|
|
- case dtoMetric.Counter != nil:
|
|
|
- metricFamily.Type = dto.MetricType_COUNTER.Enum()
|
|
|
- case dtoMetric.Summary != nil:
|
|
|
- metricFamily.Type = dto.MetricType_SUMMARY.Enum()
|
|
|
- case dtoMetric.Untyped != nil:
|
|
|
- metricFamily.Type = dto.MetricType_UNTYPED.Enum()
|
|
|
- case dtoMetric.Histogram != nil:
|
|
|
- metricFamily.Type = dto.MetricType_HISTOGRAM.Enum()
|
|
|
- default:
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "empty metric collected: %s", dtoMetric,
|
|
|
- ))
|
|
|
- continue
|
|
|
+ // Start more workers.
|
|
|
+ go collectWorker()
|
|
|
+ goroutineBudget--
|
|
|
+ runtime.Gosched()
|
|
|
+ }
|
|
|
+ // Once both checkedMetricChan and uncheckdMetricChan are closed
|
|
|
+ // and drained, the contraption above will nil out cmc and umc,
|
|
|
+ // and then we can leave the collect loop here.
|
|
|
+ if cmc == nil && umc == nil {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return internal.NormalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap()
|
|
|
+}
|
|
|
+
|
|
|
+// WriteToTextfile calls Gather on the provided Gatherer, encodes the result in the
|
|
|
+// Prometheus text format, and writes it to a temporary file. Upon success, the
|
|
|
+// temporary file is renamed to the provided filename.
|
|
|
+//
|
|
|
+// This is intended for use with the textfile collector of the node exporter.
|
|
|
+// Note that the node exporter expects the filename to be suffixed with ".prom".
|
|
|
+func WriteToTextfile(filename string, g Gatherer) error {
|
|
|
+ tmp, err := ioutil.TempFile(filepath.Dir(filename), filepath.Base(filename))
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ defer os.Remove(tmp.Name())
|
|
|
+
|
|
|
+ mfs, err := g.Gather()
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ for _, mf := range mfs {
|
|
|
+ if _, err := expfmt.MetricFamilyToText(tmp, mf); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if err := tmp.Close(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := os.Chmod(tmp.Name(), 0644); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ return os.Rename(tmp.Name(), filename)
|
|
|
+}
|
|
|
+
|
|
|
+// processMetric is an internal helper method only used by the Gather method.
|
|
|
+func processMetric(
|
|
|
+ metric Metric,
|
|
|
+ metricFamiliesByName map[string]*dto.MetricFamily,
|
|
|
+ metricHashes map[uint64]struct{},
|
|
|
+ registeredDescIDs map[uint64]struct{},
|
|
|
+) error {
|
|
|
+ desc := metric.Desc()
|
|
|
+ // Wrapped metrics collected by an unchecked Collector can have an
|
|
|
+ // invalid Desc.
|
|
|
+ if desc.err != nil {
|
|
|
+ return desc.err
|
|
|
+ }
|
|
|
+ dtoMetric := &dto.Metric{}
|
|
|
+ if err := metric.Write(dtoMetric); err != nil {
|
|
|
+ return fmt.Errorf("error collecting metric %v: %s", desc, err)
|
|
|
+ }
|
|
|
+ metricFamily, ok := metricFamiliesByName[desc.fqName]
|
|
|
+ if ok { // Existing name.
|
|
|
+ if metricFamily.GetHelp() != desc.help {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %s %s has help %q but should have %q",
|
|
|
+ desc.fqName, dtoMetric, desc.help, metricFamily.GetHelp(),
|
|
|
+ )
|
|
|
+ }
|
|
|
+ // TODO(beorn7): Simplify switch once Desc has type.
|
|
|
+ switch metricFamily.GetType() {
|
|
|
+ case dto.MetricType_COUNTER:
|
|
|
+ if dtoMetric.Counter == nil {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %s %s should be a Counter",
|
|
|
+ desc.fqName, dtoMetric,
|
|
|
+ )
|
|
|
}
|
|
|
- metricFamiliesByName[desc.fqName] = metricFamily
|
|
|
- }
|
|
|
- if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes, dimHashes); err != nil {
|
|
|
- errs = append(errs, err)
|
|
|
- continue
|
|
|
- }
|
|
|
- if r.pedanticChecksEnabled {
|
|
|
- // Is the desc registered at all?
|
|
|
- if _, exist := registeredDescIDs[desc.id]; !exist {
|
|
|
- errs = append(errs, fmt.Errorf(
|
|
|
- "collected metric %s %s with unregistered descriptor %s",
|
|
|
- metricFamily.GetName(), dtoMetric, desc,
|
|
|
- ))
|
|
|
- continue
|
|
|
+ case dto.MetricType_GAUGE:
|
|
|
+ if dtoMetric.Gauge == nil {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %s %s should be a Gauge",
|
|
|
+ desc.fqName, dtoMetric,
|
|
|
+ )
|
|
|
}
|
|
|
- if err := checkDescConsistency(metricFamily, dtoMetric, desc); err != nil {
|
|
|
- errs = append(errs, err)
|
|
|
- continue
|
|
|
+ case dto.MetricType_SUMMARY:
|
|
|
+ if dtoMetric.Summary == nil {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %s %s should be a Summary",
|
|
|
+ desc.fqName, dtoMetric,
|
|
|
+ )
|
|
|
}
|
|
|
+ case dto.MetricType_UNTYPED:
|
|
|
+ if dtoMetric.Untyped == nil {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %s %s should be Untyped",
|
|
|
+ desc.fqName, dtoMetric,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ case dto.MetricType_HISTOGRAM:
|
|
|
+ if dtoMetric.Histogram == nil {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %s %s should be a Histogram",
|
|
|
+ desc.fqName, dtoMetric,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ default:
|
|
|
+ panic("encountered MetricFamily with invalid type")
|
|
|
+ }
|
|
|
+ } else { // New name.
|
|
|
+ metricFamily = &dto.MetricFamily{}
|
|
|
+ metricFamily.Name = proto.String(desc.fqName)
|
|
|
+ metricFamily.Help = proto.String(desc.help)
|
|
|
+ // TODO(beorn7): Simplify switch once Desc has type.
|
|
|
+ switch {
|
|
|
+ case dtoMetric.Gauge != nil:
|
|
|
+ metricFamily.Type = dto.MetricType_GAUGE.Enum()
|
|
|
+ case dtoMetric.Counter != nil:
|
|
|
+ metricFamily.Type = dto.MetricType_COUNTER.Enum()
|
|
|
+ case dtoMetric.Summary != nil:
|
|
|
+ metricFamily.Type = dto.MetricType_SUMMARY.Enum()
|
|
|
+ case dtoMetric.Untyped != nil:
|
|
|
+ metricFamily.Type = dto.MetricType_UNTYPED.Enum()
|
|
|
+ case dtoMetric.Histogram != nil:
|
|
|
+ metricFamily.Type = dto.MetricType_HISTOGRAM.Enum()
|
|
|
+ default:
|
|
|
+ return fmt.Errorf("empty metric collected: %s", dtoMetric)
|
|
|
+ }
|
|
|
+ if err := checkSuffixCollisions(metricFamily, metricFamiliesByName); err != nil {
|
|
|
+ return err
|
|
|
}
|
|
|
- metricFamily.Metric = append(metricFamily.Metric, dtoMetric)
|
|
|
+ metricFamiliesByName[desc.fqName] = metricFamily
|
|
|
}
|
|
|
- return normalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap()
|
|
|
+ if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ if registeredDescIDs != nil {
|
|
|
+ // Is the desc registered at all?
|
|
|
+ if _, exist := registeredDescIDs[desc.id]; !exist {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %s %s with unregistered descriptor %s",
|
|
|
+ metricFamily.GetName(), dtoMetric, desc,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ if err := checkDescConsistency(metricFamily, dtoMetric, desc); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ metricFamily.Metric = append(metricFamily.Metric, dtoMetric)
|
|
|
+ return nil
|
|
|
}
|
|
|
|
|
|
// Gatherers is a slice of Gatherer instances that implements the Gatherer
|
|
|
// interface itself. Its Gather method calls Gather on all Gatherers in the
|
|
|
// slice in order and returns the merged results. Errors returned from the
|
|
|
-// Gather calles are all returned in a flattened MultiError. Duplicate and
|
|
|
+// Gather calls are all returned in a flattened MultiError. Duplicate and
|
|
|
// inconsistent Metrics are skipped (first occurrence in slice order wins) and
|
|
|
// reported in the returned error.
|
|
|
//
|
|
@@ -588,7 +700,6 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
|
|
|
var (
|
|
|
metricFamiliesByName = map[string]*dto.MetricFamily{}
|
|
|
metricHashes = map[uint64]struct{}{}
|
|
|
- dimHashes = map[string]uint64{}
|
|
|
errs MultiError // The collected errors to return in the end.
|
|
|
)
|
|
|
|
|
@@ -625,10 +736,14 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
|
|
|
existingMF.Name = mf.Name
|
|
|
existingMF.Help = mf.Help
|
|
|
existingMF.Type = mf.Type
|
|
|
+ if err := checkSuffixCollisions(existingMF, metricFamiliesByName); err != nil {
|
|
|
+ errs = append(errs, err)
|
|
|
+ continue
|
|
|
+ }
|
|
|
metricFamiliesByName[mf.GetName()] = existingMF
|
|
|
}
|
|
|
for _, m := range mf.Metric {
|
|
|
- if err := checkMetricConsistency(existingMF, m, metricHashes, dimHashes); err != nil {
|
|
|
+ if err := checkMetricConsistency(existingMF, m, metricHashes); err != nil {
|
|
|
errs = append(errs, err)
|
|
|
continue
|
|
|
}
|
|
@@ -636,88 +751,80 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- return normalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap()
|
|
|
-}
|
|
|
-
|
|
|
-// metricSorter is a sortable slice of *dto.Metric.
|
|
|
-type metricSorter []*dto.Metric
|
|
|
-
|
|
|
-func (s metricSorter) Len() int {
|
|
|
- return len(s)
|
|
|
-}
|
|
|
-
|
|
|
-func (s metricSorter) Swap(i, j int) {
|
|
|
- s[i], s[j] = s[j], s[i]
|
|
|
-}
|
|
|
-
|
|
|
-func (s metricSorter) Less(i, j int) bool {
|
|
|
- if len(s[i].Label) != len(s[j].Label) {
|
|
|
- // This should not happen. The metrics are
|
|
|
- // inconsistent. However, we have to deal with the fact, as
|
|
|
- // people might use custom collectors or metric family injection
|
|
|
- // to create inconsistent metrics. So let's simply compare the
|
|
|
- // number of labels in this case. That will still yield
|
|
|
- // reproducible sorting.
|
|
|
- return len(s[i].Label) < len(s[j].Label)
|
|
|
- }
|
|
|
- for n, lp := range s[i].Label {
|
|
|
- vi := lp.GetValue()
|
|
|
- vj := s[j].Label[n].GetValue()
|
|
|
- if vi != vj {
|
|
|
- return vi < vj
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // We should never arrive here. Multiple metrics with the same
|
|
|
- // label set in the same scrape will lead to undefined ingestion
|
|
|
- // behavior. However, as above, we have to provide stable sorting
|
|
|
- // here, even for inconsistent metrics. So sort equal metrics
|
|
|
- // by their timestamp, with missing timestamps (implying "now")
|
|
|
- // coming last.
|
|
|
- if s[i].TimestampMs == nil {
|
|
|
- return false
|
|
|
- }
|
|
|
- if s[j].TimestampMs == nil {
|
|
|
- return true
|
|
|
- }
|
|
|
- return s[i].GetTimestampMs() < s[j].GetTimestampMs()
|
|
|
+ return internal.NormalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap()
|
|
|
}
|
|
|
|
|
|
-// normalizeMetricFamilies returns a MetricFamily slice whith empty
|
|
|
-// MetricFamilies pruned and the remaining MetricFamilies sorted by name within
|
|
|
-// the slice, with the contained Metrics sorted within each MetricFamily.
|
|
|
-func normalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily) []*dto.MetricFamily {
|
|
|
- for _, mf := range metricFamiliesByName {
|
|
|
- sort.Sort(metricSorter(mf.Metric))
|
|
|
+// checkSuffixCollisions checks for collisions with the “magic” suffixes the
|
|
|
+// Prometheus text format and the internal metric representation of the
|
|
|
+// Prometheus server add while flattening Summaries and Histograms.
|
|
|
+func checkSuffixCollisions(mf *dto.MetricFamily, mfs map[string]*dto.MetricFamily) error {
|
|
|
+ var (
|
|
|
+ newName = mf.GetName()
|
|
|
+ newType = mf.GetType()
|
|
|
+ newNameWithoutSuffix = ""
|
|
|
+ )
|
|
|
+ switch {
|
|
|
+ case strings.HasSuffix(newName, "_count"):
|
|
|
+ newNameWithoutSuffix = newName[:len(newName)-6]
|
|
|
+ case strings.HasSuffix(newName, "_sum"):
|
|
|
+ newNameWithoutSuffix = newName[:len(newName)-4]
|
|
|
+ case strings.HasSuffix(newName, "_bucket"):
|
|
|
+ newNameWithoutSuffix = newName[:len(newName)-7]
|
|
|
+ }
|
|
|
+ if newNameWithoutSuffix != "" {
|
|
|
+ if existingMF, ok := mfs[newNameWithoutSuffix]; ok {
|
|
|
+ switch existingMF.GetType() {
|
|
|
+ case dto.MetricType_SUMMARY:
|
|
|
+ if !strings.HasSuffix(newName, "_bucket") {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric named %q collides with previously collected summary named %q",
|
|
|
+ newName, newNameWithoutSuffix,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ case dto.MetricType_HISTOGRAM:
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric named %q collides with previously collected histogram named %q",
|
|
|
+ newName, newNameWithoutSuffix,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- names := make([]string, 0, len(metricFamiliesByName))
|
|
|
- for name, mf := range metricFamiliesByName {
|
|
|
- if len(mf.Metric) > 0 {
|
|
|
- names = append(names, name)
|
|
|
+ if newType == dto.MetricType_SUMMARY || newType == dto.MetricType_HISTOGRAM {
|
|
|
+ if _, ok := mfs[newName+"_count"]; ok {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected histogram or summary named %q collides with previously collected metric named %q",
|
|
|
+ newName, newName+"_count",
|
|
|
+ )
|
|
|
+ }
|
|
|
+ if _, ok := mfs[newName+"_sum"]; ok {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected histogram or summary named %q collides with previously collected metric named %q",
|
|
|
+ newName, newName+"_sum",
|
|
|
+ )
|
|
|
}
|
|
|
}
|
|
|
- sort.Strings(names)
|
|
|
- result := make([]*dto.MetricFamily, 0, len(names))
|
|
|
- for _, name := range names {
|
|
|
- result = append(result, metricFamiliesByName[name])
|
|
|
+ if newType == dto.MetricType_HISTOGRAM {
|
|
|
+ if _, ok := mfs[newName+"_bucket"]; ok {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected histogram named %q collides with previously collected metric named %q",
|
|
|
+ newName, newName+"_bucket",
|
|
|
+ )
|
|
|
+ }
|
|
|
}
|
|
|
- return result
|
|
|
+ return nil
|
|
|
}
|
|
|
|
|
|
// checkMetricConsistency checks if the provided Metric is consistent with the
|
|
|
-// provided MetricFamily. It also hashed the Metric labels and the MetricFamily
|
|
|
-// name. If the resulting hash is alread in the provided metricHashes, an error
|
|
|
-// is returned. If not, it is added to metricHashes. The provided dimHashes maps
|
|
|
-// MetricFamily names to their dimHash (hashed sorted label names). If dimHashes
|
|
|
-// doesn't yet contain a hash for the provided MetricFamily, it is
|
|
|
-// added. Otherwise, an error is returned if the existing dimHashes in not equal
|
|
|
-// the calculated dimHash.
|
|
|
+// provided MetricFamily. It also hashes the Metric labels and the MetricFamily
|
|
|
+// name. If the resulting hash is already in the provided metricHashes, an error
|
|
|
+// is returned. If not, it is added to metricHashes.
|
|
|
func checkMetricConsistency(
|
|
|
metricFamily *dto.MetricFamily,
|
|
|
dtoMetric *dto.Metric,
|
|
|
metricHashes map[uint64]struct{},
|
|
|
- dimHashes map[string]uint64,
|
|
|
) error {
|
|
|
+ name := metricFamily.GetName()
|
|
|
+
|
|
|
// Type consistency with metric family.
|
|
|
if metricFamily.GetType() == dto.MetricType_GAUGE && dtoMetric.Gauge == nil ||
|
|
|
metricFamily.GetType() == dto.MetricType_COUNTER && dtoMetric.Counter == nil ||
|
|
@@ -725,41 +832,65 @@ func checkMetricConsistency(
|
|
|
metricFamily.GetType() == dto.MetricType_HISTOGRAM && dtoMetric.Histogram == nil ||
|
|
|
metricFamily.GetType() == dto.MetricType_UNTYPED && dtoMetric.Untyped == nil {
|
|
|
return fmt.Errorf(
|
|
|
- "collected metric %s %s is not a %s",
|
|
|
- metricFamily.GetName(), dtoMetric, metricFamily.GetType(),
|
|
|
+ "collected metric %q { %s} is not a %s",
|
|
|
+ name, dtoMetric, metricFamily.GetType(),
|
|
|
)
|
|
|
}
|
|
|
|
|
|
- // Is the metric unique (i.e. no other metric with the same name and the same label values)?
|
|
|
+ previousLabelName := ""
|
|
|
+ for _, labelPair := range dtoMetric.GetLabel() {
|
|
|
+ labelName := labelPair.GetName()
|
|
|
+ if labelName == previousLabelName {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %q { %s} has two or more labels with the same name: %s",
|
|
|
+ name, dtoMetric, labelName,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ if !checkLabelName(labelName) {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %q { %s} has a label with an invalid name: %s",
|
|
|
+ name, dtoMetric, labelName,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ if dtoMetric.Summary != nil && labelName == quantileLabel {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %q { %s} must not have an explicit %q label",
|
|
|
+ name, dtoMetric, quantileLabel,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ if !utf8.ValidString(labelPair.GetValue()) {
|
|
|
+ return fmt.Errorf(
|
|
|
+ "collected metric %q { %s} has a label named %q whose value is not utf8: %#v",
|
|
|
+ name, dtoMetric, labelName, labelPair.GetValue())
|
|
|
+ }
|
|
|
+ previousLabelName = labelName
|
|
|
+ }
|
|
|
+
|
|
|
+ // Is the metric unique (i.e. no other metric with the same name and the same labels)?
|
|
|
h := hashNew()
|
|
|
- h = hashAdd(h, metricFamily.GetName())
|
|
|
+ h = hashAdd(h, name)
|
|
|
h = hashAddByte(h, separatorByte)
|
|
|
- dh := hashNew()
|
|
|
// Make sure label pairs are sorted. We depend on it for the consistency
|
|
|
// check.
|
|
|
- sort.Sort(LabelPairSorter(dtoMetric.Label))
|
|
|
+ if !sort.IsSorted(labelPairSorter(dtoMetric.Label)) {
|
|
|
+ // We cannot sort dtoMetric.Label in place as it is immutable by contract.
|
|
|
+ copiedLabels := make([]*dto.LabelPair, len(dtoMetric.Label))
|
|
|
+ copy(copiedLabels, dtoMetric.Label)
|
|
|
+ sort.Sort(labelPairSorter(copiedLabels))
|
|
|
+ dtoMetric.Label = copiedLabels
|
|
|
+ }
|
|
|
for _, lp := range dtoMetric.Label {
|
|
|
+ h = hashAdd(h, lp.GetName())
|
|
|
+ h = hashAddByte(h, separatorByte)
|
|
|
h = hashAdd(h, lp.GetValue())
|
|
|
h = hashAddByte(h, separatorByte)
|
|
|
- dh = hashAdd(dh, lp.GetName())
|
|
|
- dh = hashAddByte(dh, separatorByte)
|
|
|
}
|
|
|
if _, exists := metricHashes[h]; exists {
|
|
|
return fmt.Errorf(
|
|
|
- "collected metric %s %s was collected before with the same name and label values",
|
|
|
- metricFamily.GetName(), dtoMetric,
|
|
|
+ "collected metric %q { %s} was collected before with the same name and label values",
|
|
|
+ name, dtoMetric,
|
|
|
)
|
|
|
}
|
|
|
- if dimHash, ok := dimHashes[metricFamily.GetName()]; ok {
|
|
|
- if dimHash != dh {
|
|
|
- return fmt.Errorf(
|
|
|
- "collected metric %s %s has label dimensions inconsistent with previously collected metrics in the same metric family",
|
|
|
- metricFamily.GetName(), dtoMetric,
|
|
|
- )
|
|
|
- }
|
|
|
- } else {
|
|
|
- dimHashes[metricFamily.GetName()] = dh
|
|
|
- }
|
|
|
metricHashes[h] = struct{}{}
|
|
|
return nil
|
|
|
}
|
|
@@ -778,8 +909,8 @@ func checkDescConsistency(
|
|
|
}
|
|
|
|
|
|
// Is the desc consistent with the content of the metric?
|
|
|
- lpsFromDesc := make([]*dto.LabelPair, 0, len(dtoMetric.Label))
|
|
|
- lpsFromDesc = append(lpsFromDesc, desc.constLabelPairs...)
|
|
|
+ lpsFromDesc := make([]*dto.LabelPair, len(desc.constLabelPairs), len(dtoMetric.Label))
|
|
|
+ copy(lpsFromDesc, desc.constLabelPairs)
|
|
|
for _, l := range desc.variableLabels {
|
|
|
lpsFromDesc = append(lpsFromDesc, &dto.LabelPair{
|
|
|
Name: proto.String(l),
|
|
@@ -791,7 +922,7 @@ func checkDescConsistency(
|
|
|
metricFamily.GetName(), dtoMetric, desc,
|
|
|
)
|
|
|
}
|
|
|
- sort.Sort(LabelPairSorter(lpsFromDesc))
|
|
|
+ sort.Sort(labelPairSorter(lpsFromDesc))
|
|
|
for i, lpFromDesc := range lpsFromDesc {
|
|
|
lpFromMetric := dtoMetric.Label[i]
|
|
|
if lpFromDesc.GetName() != lpFromMetric.GetName() ||
|