Merge pull request #44145 from neersighted/bump_memberlist_20.10

[20.10] vendor: github.com/hashicorp/memberlist v0.4.0
This commit is contained in:
Sebastiaan van Stijn 2022-09-21 17:51:18 +02:00 committed by GitHub
commit 668ab11a67
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 3591 additions and 473 deletions

View file

@ -50,9 +50,10 @@ github.com/grpc-ecosystem/go-grpc-middleware 3c51f7f332123e8be5a157c0802a
github.com/docker/libnetwork 0dde5c895075df6e3630e76f750a447cf63f4789
github.com/docker/go-events e31b211e4f1cd09aa76fe4ac244571fab96ae47f
github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
github.com/armon/go-metrics f0300d1749da6fa982027e449ec0c7a145510c3c # v0.4.1
github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
github.com/hashicorp/memberlist 3d8438da9589e7b608a83ffac1ef8211486bcb7c
github.com/hashicorp/memberlist e6ff9b2d87a3f0f3f04abb5672ada3ac2a640223 # v0.4.0
github.com/google/btree 4030bb1f1f0c35b30ca7009e9ebd06849dd45306 # v1.1.2
github.com/sean-/seed e2103e2c35297fb7e17febb81e49b312087a2372
github.com/hashicorp/errwrap 8a6fb523712970c966eefc6b39ed2c5e74880354 # v1.0.0
github.com/hashicorp/go-sockaddr c7188e74f6acae5a989bdc959aa779f8b9f42faf # v1.0.2

View file

@ -4,14 +4,17 @@ go-metrics
This library provides a `metrics` package which can be used to instrument code,
expose application metrics, and profile runtime performance in a flexible manner.
Current API: [![GoDoc](https://godoc.org/github.com/armon/go-metrics?status.svg)](https://godoc.org/github.com/armon/go-metrics)
Sinks
=====
-----
The `metrics` package makes use of a `MetricSink` interface to support delivery
to any type of backend. Currently the following sinks are provided:
* StatsiteSink : Sinks to a statsite instance (TCP)
* StatsdSink: Sinks to a statsd / statsite instance (UDP)
* StatsiteSink : Sinks to a [statsite](https://github.com/armon/statsite/) instance (TCP)
* StatsdSink: Sinks to a [StatsD](https://github.com/etsy/statsd/) / statsite instance (UDP)
* PrometheusSink: Sinks to a [Prometheus](http://prometheus.io/) metrics endpoint (exposed via HTTP for scrapes)
* InmemSink : Provides in-memory aggregation, can be used to export stats
* FanoutSink : Sinks to multiple sinks. Enables writing to multiple statsite instances for example.
* BlackholeSink : Sinks to nowhere
@ -20,49 +23,69 @@ In addition to the sinks, the `InmemSignal` can be used to catch a signal,
and dump a formatted output of recent metrics. For example, when a process gets
a SIGUSR1, it can dump to stderr recent performance metrics for debugging.
Labels
------
Most metrics do have an equivalent ending with `WithLabels`, such methods
allow to push metrics with labels and use some features of underlying Sinks
(ex: translated into Prometheus labels).
Since some of these labels may increase greatly cardinality of metrics, the
library allow to filter labels using a blacklist/whitelist filtering system
which is global to all metrics.
* If `Config.AllowedLabels` is not nil, then only labels specified in this value will be sent to underlying Sink, otherwise, all labels are sent by default.
* If `Config.BlockedLabels` is not nil, any label specified in this value will not be sent to underlying Sinks.
By default, both `Config.AllowedLabels` and `Config.BlockedLabels` are nil, meaning that
no tags are filetered at all, but it allow to a user to globally block some tags with high
cardinality at application level.
Examples
========
--------
Here is an example of using the package:
func SlowMethod() {
// Profiling the runtime of a method
defer metrics.MeasureSince([]string{"SlowMethod"}, time.Now())
}
```go
func SlowMethod() {
// Profiling the runtime of a method
defer metrics.MeasureSince([]string{"SlowMethod"}, time.Now())
}
// Configure a statsite sink as the global metrics sink
sink, _ := metrics.NewStatsiteSink("statsite:8125")
metrics.NewGlobal(metrics.DefaultConfig("service-name"), sink)
// Configure a statsite sink as the global metrics sink
sink, _ := metrics.NewStatsiteSink("statsite:8125")
metrics.NewGlobal(metrics.DefaultConfig("service-name"), sink)
// Emit a Key/Value pair
metrics.EmitKey([]string{"questions", "meaning of life"}, 42)
// Emit a Key/Value pair
metrics.EmitKey([]string{"questions", "meaning of life"}, 42)
```
Here is an example of setting up a signal handler:
Here is an example of setting up an signal handler:
```go
// Setup the inmem sink and signal handler
inm := metrics.NewInmemSink(10*time.Second, time.Minute)
sig := metrics.DefaultInmemSignal(inm)
metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm)
// Setup the inmem sink and signal handler
inm := NewInmemSink(10*time.Second, time.Minute)
sig := DefaultInmemSignal(inm)
metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm)
// Run some code
inm.SetGauge([]string{"foo"}, 42)
inm.EmitKey([]string{"bar"}, 30)
// Run some code
inm.SetGauge([]string{"foo"}, 42)
inm.EmitKey([]string{"bar"}, 30)
inm.IncrCounter([]string{"baz"}, 42)
inm.IncrCounter([]string{"baz"}, 1)
inm.IncrCounter([]string{"baz"}, 80)
inm.IncrCounter([]string{"baz"}, 42)
inm.IncrCounter([]string{"baz"}, 1)
inm.IncrCounter([]string{"baz"}, 80)
inm.AddSample([]string{"method", "wow"}, 42)
inm.AddSample([]string{"method", "wow"}, 100)
inm.AddSample([]string{"method", "wow"}, 22)
inm.AddSample([]string{"method", "wow"}, 42)
inm.AddSample([]string{"method", "wow"}, 100)
inm.AddSample([]string{"method", "wow"}, 22)
....
....
```
When a signal comes in, output like the following will be dumped to stderr:
[2014-01-28 14:57:33.04 -0800 PST][G] 'foo': 42.000
[2014-01-28 14:57:33.04 -0800 PST][P] 'bar': 30.000
[2014-01-28 14:57:33.04 -0800 PST][C] 'baz': Count: 3 Min: 1.000 Mean: 41.000 Max: 80.000 Stddev: 39.509
[2014-01-28 14:57:33.04 -0800 PST][S] 'method.wow': Count: 3 Min: 22.000 Mean: 54.667 Max: 100.000 Stddev: 40.513
[2014-01-28 14:57:33.04 -0800 PST][C] 'baz': Count: 3 Min: 1.000 Mean: 41.000 Max: 80.000 Stddev: 39.509
[2014-01-28 14:57:33.04 -0800 PST][S] 'method.wow': Count: 3 Min: 22.000 Mean: 54.667 Max: 100.000 Stddev: 40.513

View file

@ -1,8 +1,10 @@
package metrics
import (
"bytes"
"fmt"
"math"
"net/url"
"strings"
"sync"
"time"
@ -25,6 +27,8 @@ type InmemSink struct {
// intervals is a slice of the retained intervals
intervals []*IntervalMetrics
intervalLock sync.RWMutex
rateDenom float64
}
// IntervalMetrics stores the aggregated metrics
@ -36,7 +40,7 @@ type IntervalMetrics struct {
Interval time.Time
// Gauges maps the key to the last set value
Gauges map[string]float32
Gauges map[string]GaugeValue
// Points maps the string to the list of emitted values
// from EmitKey
@ -44,32 +48,34 @@ type IntervalMetrics struct {
// Counters maps the string key to a sum of the counter
// values
Counters map[string]*AggregateSample
Counters map[string]SampledValue
// Samples maps the key to an AggregateSample,
// which has the rolled up view of a sample
Samples map[string]*AggregateSample
Samples map[string]SampledValue
}
// NewIntervalMetrics creates a new IntervalMetrics for a given interval
func NewIntervalMetrics(intv time.Time) *IntervalMetrics {
return &IntervalMetrics{
Interval: intv,
Gauges: make(map[string]float32),
Gauges: make(map[string]GaugeValue),
Points: make(map[string][]float32),
Counters: make(map[string]*AggregateSample),
Samples: make(map[string]*AggregateSample),
Counters: make(map[string]SampledValue),
Samples: make(map[string]SampledValue),
}
}
// AggregateSample is used to hold aggregate metrics
// about a sample
type AggregateSample struct {
Count int // The count of emitted pairs
Sum float64 // The sum of values
SumSq float64 // The sum of squared values
Min float64 // Minimum value
Max float64 // Maximum value
Count int // The count of emitted pairs
Rate float64 // The values rate per time unit (usually 1 second)
Sum float64 // The sum of values
SumSq float64 `json:"-"` // The sum of squared values
Min float64 // Minimum value
Max float64 // Maximum value
LastUpdated time.Time `json:"-"` // When value was last updated
}
// Computes a Stddev of the values
@ -91,7 +97,7 @@ func (a *AggregateSample) Mean() float64 {
}
// Ingest is used to update a sample
func (a *AggregateSample) Ingest(v float64) {
func (a *AggregateSample) Ingest(v float64, rateDenom float64) {
a.Count++
a.Sum += v
a.SumSq += (v * v)
@ -101,38 +107,64 @@ func (a *AggregateSample) Ingest(v float64) {
if v > a.Max || a.Count == 1 {
a.Max = v
}
a.Rate = float64(a.Sum) / rateDenom
a.LastUpdated = time.Now()
}
func (a *AggregateSample) String() string {
if a.Count == 0 {
return "Count: 0"
} else if a.Stddev() == 0 {
return fmt.Sprintf("Count: %d Sum: %0.3f", a.Count, a.Sum)
return fmt.Sprintf("Count: %d Sum: %0.3f LastUpdated: %s", a.Count, a.Sum, a.LastUpdated)
} else {
return fmt.Sprintf("Count: %d Min: %0.3f Mean: %0.3f Max: %0.3f Stddev: %0.3f Sum: %0.3f",
a.Count, a.Min, a.Mean(), a.Max, a.Stddev(), a.Sum)
return fmt.Sprintf("Count: %d Min: %0.3f Mean: %0.3f Max: %0.3f Stddev: %0.3f Sum: %0.3f LastUpdated: %s",
a.Count, a.Min, a.Mean(), a.Max, a.Stddev(), a.Sum, a.LastUpdated)
}
}
// NewInmemSinkFromURL creates an InmemSink from a URL. It is used
// (and tested) from NewMetricSinkFromURL.
func NewInmemSinkFromURL(u *url.URL) (MetricSink, error) {
params := u.Query()
interval, err := time.ParseDuration(params.Get("interval"))
if err != nil {
return nil, fmt.Errorf("Bad 'interval' param: %s", err)
}
retain, err := time.ParseDuration(params.Get("retain"))
if err != nil {
return nil, fmt.Errorf("Bad 'retain' param: %s", err)
}
return NewInmemSink(interval, retain), nil
}
// NewInmemSink is used to construct a new in-memory sink.
// Uses an aggregation interval and maximum retention period.
func NewInmemSink(interval, retain time.Duration) *InmemSink {
rateTimeUnit := time.Second
i := &InmemSink{
interval: interval,
retain: retain,
maxIntervals: int(retain / interval),
rateDenom: float64(interval.Nanoseconds()) / float64(rateTimeUnit.Nanoseconds()),
}
i.intervals = make([]*IntervalMetrics, 0, i.maxIntervals)
return i
}
func (i *InmemSink) SetGauge(key []string, val float32) {
k := i.flattenKey(key)
i.SetGaugeWithLabels(key, val, nil)
}
func (i *InmemSink) SetGaugeWithLabels(key []string, val float32, labels []Label) {
k, name := i.flattenKeyLabels(key, labels)
intv := i.getInterval()
intv.Lock()
defer intv.Unlock()
intv.Gauges[k] = val
intv.Gauges[k] = GaugeValue{Name: name, Value: val, Labels: labels}
}
func (i *InmemSink) EmitKey(key []string, val float32) {
@ -146,33 +178,49 @@ func (i *InmemSink) EmitKey(key []string, val float32) {
}
func (i *InmemSink) IncrCounter(key []string, val float32) {
k := i.flattenKey(key)
i.IncrCounterWithLabels(key, val, nil)
}
func (i *InmemSink) IncrCounterWithLabels(key []string, val float32, labels []Label) {
k, name := i.flattenKeyLabels(key, labels)
intv := i.getInterval()
intv.Lock()
defer intv.Unlock()
agg := intv.Counters[k]
if agg == nil {
agg = &AggregateSample{}
agg, ok := intv.Counters[k]
if !ok {
agg = SampledValue{
Name: name,
AggregateSample: &AggregateSample{},
Labels: labels,
}
intv.Counters[k] = agg
}
agg.Ingest(float64(val))
agg.Ingest(float64(val), i.rateDenom)
}
func (i *InmemSink) AddSample(key []string, val float32) {
k := i.flattenKey(key)
i.AddSampleWithLabels(key, val, nil)
}
func (i *InmemSink) AddSampleWithLabels(key []string, val float32, labels []Label) {
k, name := i.flattenKeyLabels(key, labels)
intv := i.getInterval()
intv.Lock()
defer intv.Unlock()
agg := intv.Samples[k]
if agg == nil {
agg = &AggregateSample{}
agg, ok := intv.Samples[k]
if !ok {
agg = SampledValue{
Name: name,
AggregateSample: &AggregateSample{},
Labels: labels,
}
intv.Samples[k] = agg
}
agg.Ingest(float64(val))
agg.Ingest(float64(val), i.rateDenom)
}
// Data is used to retrieve all the aggregated metrics
@ -184,8 +232,37 @@ func (i *InmemSink) Data() []*IntervalMetrics {
i.intervalLock.RLock()
defer i.intervalLock.RUnlock()
intervals := make([]*IntervalMetrics, len(i.intervals))
copy(intervals, i.intervals)
n := len(i.intervals)
intervals := make([]*IntervalMetrics, n)
copy(intervals[:n-1], i.intervals[:n-1])
current := i.intervals[n-1]
// make its own copy for current interval
intervals[n-1] = &IntervalMetrics{}
copyCurrent := intervals[n-1]
current.RLock()
*copyCurrent = *current
copyCurrent.Gauges = make(map[string]GaugeValue, len(current.Gauges))
for k, v := range current.Gauges {
copyCurrent.Gauges[k] = v
}
// saved values will be not change, just copy its link
copyCurrent.Points = make(map[string][]float32, len(current.Points))
for k, v := range current.Points {
copyCurrent.Points[k] = v
}
copyCurrent.Counters = make(map[string]SampledValue, len(current.Counters))
for k, v := range current.Counters {
copyCurrent.Counters[k] = v
}
copyCurrent.Samples = make(map[string]SampledValue, len(current.Samples))
for k, v := range current.Samples {
copyCurrent.Samples[k] = v
}
current.RUnlock()
return intervals
}
@ -234,6 +311,38 @@ func (i *InmemSink) getInterval() *IntervalMetrics {
// Flattens the key for formatting, removes spaces
func (i *InmemSink) flattenKey(parts []string) string {
joined := strings.Join(parts, ".")
return strings.Replace(joined, " ", "_", -1)
buf := &bytes.Buffer{}
replacer := strings.NewReplacer(" ", "_")
if len(parts) > 0 {
replacer.WriteString(buf, parts[0])
}
for _, part := range parts[1:] {
replacer.WriteString(buf, ".")
replacer.WriteString(buf, part)
}
return buf.String()
}
// Flattens the key for formatting along with its labels, removes spaces
func (i *InmemSink) flattenKeyLabels(parts []string, labels []Label) (string, string) {
buf := &bytes.Buffer{}
replacer := strings.NewReplacer(" ", "_")
if len(parts) > 0 {
replacer.WriteString(buf, parts[0])
}
for _, part := range parts[1:] {
replacer.WriteString(buf, ".")
replacer.WriteString(buf, part)
}
key := buf.String()
for _, label := range labels {
replacer.WriteString(buf, fmt.Sprintf(";%s=%s", label.Name, label.Value))
}
return buf.String(), key
}

118
vendor/github.com/armon/go-metrics/inmem_endpoint.go generated vendored Normal file
View file

@ -0,0 +1,118 @@
package metrics
import (
"fmt"
"net/http"
"sort"
"time"
)
// MetricsSummary holds a roll-up of metrics info for a given interval
type MetricsSummary struct {
Timestamp string
Gauges []GaugeValue
Points []PointValue
Counters []SampledValue
Samples []SampledValue
}
type GaugeValue struct {
Name string
Hash string `json:"-"`
Value float32
Labels []Label `json:"-"`
DisplayLabels map[string]string `json:"Labels"`
}
type PointValue struct {
Name string
Points []float32
}
type SampledValue struct {
Name string
Hash string `json:"-"`
*AggregateSample
Mean float64
Stddev float64
Labels []Label `json:"-"`
DisplayLabels map[string]string `json:"Labels"`
}
// DisplayMetrics returns a summary of the metrics from the most recent finished interval.
func (i *InmemSink) DisplayMetrics(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
data := i.Data()
var interval *IntervalMetrics
n := len(data)
switch {
case n == 0:
return nil, fmt.Errorf("no metric intervals have been initialized yet")
case n == 1:
// Show the current interval if it's all we have
interval = i.intervals[0]
default:
// Show the most recent finished interval if we have one
interval = i.intervals[n-2]
}
summary := MetricsSummary{
Timestamp: interval.Interval.Round(time.Second).UTC().String(),
Gauges: make([]GaugeValue, 0, len(interval.Gauges)),
Points: make([]PointValue, 0, len(interval.Points)),
}
// Format and sort the output of each metric type, so it gets displayed in a
// deterministic order.
for name, points := range interval.Points {
summary.Points = append(summary.Points, PointValue{name, points})
}
sort.Slice(summary.Points, func(i, j int) bool {
return summary.Points[i].Name < summary.Points[j].Name
})
for hash, value := range interval.Gauges {
value.Hash = hash
value.DisplayLabels = make(map[string]string)
for _, label := range value.Labels {
value.DisplayLabels[label.Name] = label.Value
}
value.Labels = nil
summary.Gauges = append(summary.Gauges, value)
}
sort.Slice(summary.Gauges, func(i, j int) bool {
return summary.Gauges[i].Hash < summary.Gauges[j].Hash
})
summary.Counters = formatSamples(interval.Counters)
summary.Samples = formatSamples(interval.Samples)
return summary, nil
}
func formatSamples(source map[string]SampledValue) []SampledValue {
output := make([]SampledValue, 0, len(source))
for hash, sample := range source {
displayLabels := make(map[string]string)
for _, label := range sample.Labels {
displayLabels[label.Name] = label.Value
}
output = append(output, SampledValue{
Name: sample.Name,
Hash: hash,
AggregateSample: sample.AggregateSample,
Mean: sample.AggregateSample.Mean(),
Stddev: sample.AggregateSample.Stddev(),
DisplayLabels: displayLabels,
})
}
sort.Slice(output, func(i, j int) bool {
return output[i].Hash < output[j].Hash
})
return output
}

View file

@ -6,6 +6,7 @@ import (
"io"
"os"
"os/signal"
"strings"
"sync"
"syscall"
)
@ -75,22 +76,25 @@ func (i *InmemSignal) dumpStats() {
data := i.inm.Data()
// Skip the last period which is still being aggregated
for i := 0; i < len(data)-1; i++ {
intv := data[i]
for j := 0; j < len(data)-1; j++ {
intv := data[j]
intv.RLock()
for name, val := range intv.Gauges {
fmt.Fprintf(buf, "[%v][G] '%s': %0.3f\n", intv.Interval, name, val)
for _, val := range intv.Gauges {
name := i.flattenLabels(val.Name, val.Labels)
fmt.Fprintf(buf, "[%v][G] '%s': %0.3f\n", intv.Interval, name, val.Value)
}
for name, vals := range intv.Points {
for _, val := range vals {
fmt.Fprintf(buf, "[%v][P] '%s': %0.3f\n", intv.Interval, name, val)
}
}
for name, agg := range intv.Counters {
fmt.Fprintf(buf, "[%v][C] '%s': %s\n", intv.Interval, name, agg)
for _, agg := range intv.Counters {
name := i.flattenLabels(agg.Name, agg.Labels)
fmt.Fprintf(buf, "[%v][C] '%s': %s\n", intv.Interval, name, agg.AggregateSample)
}
for name, agg := range intv.Samples {
fmt.Fprintf(buf, "[%v][S] '%s': %s\n", intv.Interval, name, agg)
for _, agg := range intv.Samples {
name := i.flattenLabels(agg.Name, agg.Labels)
fmt.Fprintf(buf, "[%v][S] '%s': %s\n", intv.Interval, name, agg.AggregateSample)
}
intv.RUnlock()
}
@ -98,3 +102,16 @@ func (i *InmemSignal) dumpStats() {
// Write out the bytes
i.w.Write(buf.Bytes())
}
// Flattens the key for formatting along with its labels, removes spaces
func (i *InmemSignal) flattenLabels(name string, labels []Label) string {
buf := bytes.NewBufferString(name)
replacer := strings.NewReplacer(" ", "_", ":", "_")
for _, label := range labels {
replacer.WriteString(buf, ".")
replacer.WriteString(buf, label.Value)
}
return buf.String()
}

183
vendor/github.com/armon/go-metrics/metrics.go generated vendored Executable file → Normal file
View file

@ -2,20 +2,44 @@ package metrics
import (
"runtime"
"strings"
"time"
"github.com/hashicorp/go-immutable-radix"
)
type Label struct {
Name string
Value string
}
func (m *Metrics) SetGauge(key []string, val float32) {
if m.HostName != "" && m.EnableHostname {
key = insert(0, m.HostName, key)
m.SetGaugeWithLabels(key, val, nil)
}
func (m *Metrics) SetGaugeWithLabels(key []string, val float32, labels []Label) {
if m.HostName != "" {
if m.EnableHostnameLabel {
labels = append(labels, Label{"host", m.HostName})
} else if m.EnableHostname {
key = insert(0, m.HostName, key)
}
}
if m.EnableTypePrefix {
key = insert(0, "gauge", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
if m.EnableServiceLabel {
labels = append(labels, Label{"service", m.ServiceName})
} else {
key = insert(0, m.ServiceName, key)
}
}
m.sink.SetGauge(key, val)
allowed, labelsFiltered := m.allowMetric(key, labels)
if !allowed {
return
}
m.sink.SetGaugeWithLabels(key, val, labelsFiltered)
}
func (m *Metrics) EmitKey(key []string, val float32) {
@ -25,40 +49,179 @@ func (m *Metrics) EmitKey(key []string, val float32) {
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
}
allowed, _ := m.allowMetric(key, nil)
if !allowed {
return
}
m.sink.EmitKey(key, val)
}
func (m *Metrics) IncrCounter(key []string, val float32) {
m.IncrCounterWithLabels(key, val, nil)
}
func (m *Metrics) IncrCounterWithLabels(key []string, val float32, labels []Label) {
if m.HostName != "" && m.EnableHostnameLabel {
labels = append(labels, Label{"host", m.HostName})
}
if m.EnableTypePrefix {
key = insert(0, "counter", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
if m.EnableServiceLabel {
labels = append(labels, Label{"service", m.ServiceName})
} else {
key = insert(0, m.ServiceName, key)
}
}
m.sink.IncrCounter(key, val)
allowed, labelsFiltered := m.allowMetric(key, labels)
if !allowed {
return
}
m.sink.IncrCounterWithLabels(key, val, labelsFiltered)
}
func (m *Metrics) AddSample(key []string, val float32) {
m.AddSampleWithLabels(key, val, nil)
}
func (m *Metrics) AddSampleWithLabels(key []string, val float32, labels []Label) {
if m.HostName != "" && m.EnableHostnameLabel {
labels = append(labels, Label{"host", m.HostName})
}
if m.EnableTypePrefix {
key = insert(0, "sample", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
if m.EnableServiceLabel {
labels = append(labels, Label{"service", m.ServiceName})
} else {
key = insert(0, m.ServiceName, key)
}
}
m.sink.AddSample(key, val)
allowed, labelsFiltered := m.allowMetric(key, labels)
if !allowed {
return
}
m.sink.AddSampleWithLabels(key, val, labelsFiltered)
}
func (m *Metrics) MeasureSince(key []string, start time.Time) {
m.MeasureSinceWithLabels(key, start, nil)
}
func (m *Metrics) MeasureSinceWithLabels(key []string, start time.Time, labels []Label) {
if m.HostName != "" && m.EnableHostnameLabel {
labels = append(labels, Label{"host", m.HostName})
}
if m.EnableTypePrefix {
key = insert(0, "timer", key)
}
if m.ServiceName != "" {
key = insert(0, m.ServiceName, key)
if m.EnableServiceLabel {
labels = append(labels, Label{"service", m.ServiceName})
} else {
key = insert(0, m.ServiceName, key)
}
}
allowed, labelsFiltered := m.allowMetric(key, labels)
if !allowed {
return
}
now := time.Now()
elapsed := now.Sub(start)
msec := float32(elapsed.Nanoseconds()) / float32(m.TimerGranularity)
m.sink.AddSample(key, msec)
m.sink.AddSampleWithLabels(key, msec, labelsFiltered)
}
// UpdateFilter overwrites the existing filter with the given rules.
func (m *Metrics) UpdateFilter(allow, block []string) {
m.UpdateFilterAndLabels(allow, block, m.AllowedLabels, m.BlockedLabels)
}
// UpdateFilterAndLabels overwrites the existing filter with the given rules.
func (m *Metrics) UpdateFilterAndLabels(allow, block, allowedLabels, blockedLabels []string) {
m.filterLock.Lock()
defer m.filterLock.Unlock()
m.AllowedPrefixes = allow
m.BlockedPrefixes = block
if allowedLabels == nil {
// Having a white list means we take only elements from it
m.allowedLabels = nil
} else {
m.allowedLabels = make(map[string]bool)
for _, v := range allowedLabels {
m.allowedLabels[v] = true
}
}
m.blockedLabels = make(map[string]bool)
for _, v := range blockedLabels {
m.blockedLabels[v] = true
}
m.AllowedLabels = allowedLabels
m.BlockedLabels = blockedLabels
m.filter = iradix.New()
for _, prefix := range m.AllowedPrefixes {
m.filter, _, _ = m.filter.Insert([]byte(prefix), true)
}
for _, prefix := range m.BlockedPrefixes {
m.filter, _, _ = m.filter.Insert([]byte(prefix), false)
}
}
// labelIsAllowed return true if a should be included in metric
// the caller should lock m.filterLock while calling this method
func (m *Metrics) labelIsAllowed(label *Label) bool {
labelName := (*label).Name
if m.blockedLabels != nil {
_, ok := m.blockedLabels[labelName]
if ok {
// If present, let's remove this label
return false
}
}
if m.allowedLabels != nil {
_, ok := m.allowedLabels[labelName]
return ok
}
// Allow by default
return true
}
// filterLabels return only allowed labels
// the caller should lock m.filterLock while calling this method
func (m *Metrics) filterLabels(labels []Label) []Label {
if labels == nil {
return nil
}
toReturn := labels[:0]
for _, label := range labels {
if m.labelIsAllowed(&label) {
toReturn = append(toReturn, label)
}
}
return toReturn
}
// Returns whether the metric should be allowed based on configured prefix filters
// Also return the applicable labels
func (m *Metrics) allowMetric(key []string, labels []Label) (bool, []Label) {
m.filterLock.RLock()
defer m.filterLock.RUnlock()
if m.filter == nil || m.filter.Len() == 0 {
return m.Config.FilterDefault, m.filterLabels(labels)
}
_, allowed, ok := m.filter.Root().LongestPrefix([]byte(strings.Join(key, ".")))
if !ok {
return m.Config.FilterDefault, m.filterLabels(labels)
}
return allowed.(bool), m.filterLabels(labels)
}
// Periodically collects runtime stats to publish

77
vendor/github.com/armon/go-metrics/sink.go generated vendored Executable file → Normal file
View file

@ -1,35 +1,50 @@
package metrics
import (
"fmt"
"net/url"
)
// The MetricSink interface is used to transmit metrics information
// to an external system
type MetricSink interface {
// A Gauge should retain the last value it is set to
SetGauge(key []string, val float32)
SetGaugeWithLabels(key []string, val float32, labels []Label)
// Should emit a Key/Value pair for each call
EmitKey(key []string, val float32)
// Counters should accumulate values
IncrCounter(key []string, val float32)
IncrCounterWithLabels(key []string, val float32, labels []Label)
// Samples are for timing information, where quantiles are used
AddSample(key []string, val float32)
AddSampleWithLabels(key []string, val float32, labels []Label)
}
// BlackholeSink is used to just blackhole messages
type BlackholeSink struct{}
func (*BlackholeSink) SetGauge(key []string, val float32) {}
func (*BlackholeSink) EmitKey(key []string, val float32) {}
func (*BlackholeSink) IncrCounter(key []string, val float32) {}
func (*BlackholeSink) AddSample(key []string, val float32) {}
func (*BlackholeSink) SetGauge(key []string, val float32) {}
func (*BlackholeSink) SetGaugeWithLabels(key []string, val float32, labels []Label) {}
func (*BlackholeSink) EmitKey(key []string, val float32) {}
func (*BlackholeSink) IncrCounter(key []string, val float32) {}
func (*BlackholeSink) IncrCounterWithLabels(key []string, val float32, labels []Label) {}
func (*BlackholeSink) AddSample(key []string, val float32) {}
func (*BlackholeSink) AddSampleWithLabels(key []string, val float32, labels []Label) {}
// FanoutSink is used to sink to fanout values to multiple sinks
type FanoutSink []MetricSink
func (fh FanoutSink) SetGauge(key []string, val float32) {
fh.SetGaugeWithLabels(key, val, nil)
}
func (fh FanoutSink) SetGaugeWithLabels(key []string, val float32, labels []Label) {
for _, s := range fh {
s.SetGauge(key, val)
s.SetGaugeWithLabels(key, val, labels)
}
}
@ -40,13 +55,61 @@ func (fh FanoutSink) EmitKey(key []string, val float32) {
}
func (fh FanoutSink) IncrCounter(key []string, val float32) {
fh.IncrCounterWithLabels(key, val, nil)
}
func (fh FanoutSink) IncrCounterWithLabels(key []string, val float32, labels []Label) {
for _, s := range fh {
s.IncrCounter(key, val)
s.IncrCounterWithLabels(key, val, labels)
}
}
func (fh FanoutSink) AddSample(key []string, val float32) {
fh.AddSampleWithLabels(key, val, nil)
}
func (fh FanoutSink) AddSampleWithLabels(key []string, val float32, labels []Label) {
for _, s := range fh {
s.AddSample(key, val)
s.AddSampleWithLabels(key, val, labels)
}
}
// sinkURLFactoryFunc is an generic interface around the *SinkFromURL() function provided
// by each sink type
type sinkURLFactoryFunc func(*url.URL) (MetricSink, error)
// sinkRegistry supports the generic NewMetricSink function by mapping URL
// schemes to metric sink factory functions
var sinkRegistry = map[string]sinkURLFactoryFunc{
"statsd": NewStatsdSinkFromURL,
"statsite": NewStatsiteSinkFromURL,
"inmem": NewInmemSinkFromURL,
}
// NewMetricSinkFromURL allows a generic URL input to configure any of the
// supported sinks. The scheme of the URL identifies the type of the sink, the
// and query parameters are used to set options.
//
// "statsd://" - Initializes a StatsdSink. The host and port are passed through
// as the "addr" of the sink
//
// "statsite://" - Initializes a StatsiteSink. The host and port become the
// "addr" of the sink
//
// "inmem://" - Initializes an InmemSink. The host and port are ignored. The
// "interval" and "duration" query parameters must be specified with valid
// durations, see NewInmemSink for details.
func NewMetricSinkFromURL(urlStr string) (MetricSink, error) {
u, err := url.Parse(urlStr)
if err != nil {
return nil, err
}
sinkURLFactoryFunc := sinkRegistry[u.Scheme]
if sinkURLFactoryFunc == nil {
return nil, fmt.Errorf(
"cannot create metric sink, unrecognized sink name: %q", u.Scheme)
}
return sinkURLFactoryFunc(u)
}

68
vendor/github.com/armon/go-metrics/start.go generated vendored Executable file → Normal file
View file

@ -2,34 +2,50 @@ package metrics
import (
"os"
"sync"
"sync/atomic"
"time"
"github.com/hashicorp/go-immutable-radix"
)
// Config is used to configure metrics settings
type Config struct {
ServiceName string // Prefixed with keys to seperate services
ServiceName string // Prefixed with keys to separate services
HostName string // Hostname to use. If not provided and EnableHostname, it will be os.Hostname
EnableHostname bool // Enable prefixing gauge values with hostname
EnableHostnameLabel bool // Enable adding hostname to labels
EnableServiceLabel bool // Enable adding service to labels
EnableRuntimeMetrics bool // Enables profiling of runtime metrics (GC, Goroutines, Memory)
EnableTypePrefix bool // Prefixes key with a type ("counter", "gauge", "timer")
TimerGranularity time.Duration // Granularity of timers.
ProfileInterval time.Duration // Interval to profile runtime metrics
AllowedPrefixes []string // A list of metric prefixes to allow, with '.' as the separator
BlockedPrefixes []string // A list of metric prefixes to block, with '.' as the separator
AllowedLabels []string // A list of metric labels to allow, with '.' as the separator
BlockedLabels []string // A list of metric labels to block, with '.' as the separator
FilterDefault bool // Whether to allow metrics by default
}
// Metrics represents an instance of a metrics sink that can
// be used to emit
type Metrics struct {
Config
lastNumGC uint32
sink MetricSink
lastNumGC uint32
sink MetricSink
filter *iradix.Tree
allowedLabels map[string]bool
blockedLabels map[string]bool
filterLock sync.RWMutex // Lock filters and allowedLabels/blockedLabels access
}
// Shared global metrics instance
var globalMetrics *Metrics
var globalMetrics atomic.Value // *Metrics
func init() {
// Initialize to a blackhole sink to avoid errors
globalMetrics = &Metrics{sink: &BlackholeSink{}}
globalMetrics.Store(&Metrics{sink: &BlackholeSink{}})
}
// DefaultConfig provides a sane default configuration
@ -42,6 +58,7 @@ func DefaultConfig(serviceName string) *Config {
EnableTypePrefix: false, // Disable type prefix
TimerGranularity: time.Millisecond, // Timers are in milliseconds
ProfileInterval: time.Second, // Poll runtime every second
FilterDefault: true, // Don't filter metrics by default
}
// Try to get the hostname
@ -55,6 +72,7 @@ func New(conf *Config, sink MetricSink) (*Metrics, error) {
met := &Metrics{}
met.Config = *conf
met.sink = sink
met.UpdateFilterAndLabels(conf.AllowedPrefixes, conf.BlockedPrefixes, conf.AllowedLabels, conf.BlockedLabels)
// Start the runtime collector
if conf.EnableRuntimeMetrics {
@ -68,28 +86,56 @@ func New(conf *Config, sink MetricSink) (*Metrics, error) {
func NewGlobal(conf *Config, sink MetricSink) (*Metrics, error) {
metrics, err := New(conf, sink)
if err == nil {
globalMetrics = metrics
globalMetrics.Store(metrics)
}
return metrics, err
}
// Proxy all the methods to the globalMetrics instance
func SetGauge(key []string, val float32) {
globalMetrics.SetGauge(key, val)
globalMetrics.Load().(*Metrics).SetGauge(key, val)
}
func SetGaugeWithLabels(key []string, val float32, labels []Label) {
globalMetrics.Load().(*Metrics).SetGaugeWithLabels(key, val, labels)
}
func EmitKey(key []string, val float32) {
globalMetrics.EmitKey(key, val)
globalMetrics.Load().(*Metrics).EmitKey(key, val)
}
func IncrCounter(key []string, val float32) {
globalMetrics.IncrCounter(key, val)
globalMetrics.Load().(*Metrics).IncrCounter(key, val)
}
func IncrCounterWithLabels(key []string, val float32, labels []Label) {
globalMetrics.Load().(*Metrics).IncrCounterWithLabels(key, val, labels)
}
func AddSample(key []string, val float32) {
globalMetrics.AddSample(key, val)
globalMetrics.Load().(*Metrics).AddSample(key, val)
}
func AddSampleWithLabels(key []string, val float32, labels []Label) {
globalMetrics.Load().(*Metrics).AddSampleWithLabels(key, val, labels)
}
func MeasureSince(key []string, start time.Time) {
globalMetrics.MeasureSince(key, start)
globalMetrics.Load().(*Metrics).MeasureSince(key, start)
}
func MeasureSinceWithLabels(key []string, start time.Time, labels []Label) {
globalMetrics.Load().(*Metrics).MeasureSinceWithLabels(key, start, labels)
}
func UpdateFilter(allow, block []string) {
globalMetrics.Load().(*Metrics).UpdateFilter(allow, block)
}
// UpdateFilterAndLabels set allow/block prefixes of metrics while allowedLabels
// and blockedLabels - when not nil - allow filtering of labels in order to
// block/allow globally labels (especially useful when having large number of
// values for a given label). See README.md for more information about usage.
func UpdateFilterAndLabels(allow, block, allowedLabels, blockedLabels []string) {
globalMetrics.Load().(*Metrics).UpdateFilterAndLabels(allow, block, allowedLabels, blockedLabels)
}

View file

@ -5,6 +5,7 @@ import (
"fmt"
"log"
"net"
"net/url"
"strings"
"time"
)
@ -23,6 +24,12 @@ type StatsdSink struct {
metricQueue chan string
}
// NewStatsdSinkFromURL creates an StatsdSink from a URL. It is used
// (and tested) from NewMetricSinkFromURL.
func NewStatsdSinkFromURL(u *url.URL) (MetricSink, error) {
return NewStatsdSink(u.Host)
}
// NewStatsdSink is used to create a new StatsdSink
func NewStatsdSink(addr string) (*StatsdSink, error) {
s := &StatsdSink{
@ -43,6 +50,11 @@ func (s *StatsdSink) SetGauge(key []string, val float32) {
s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
}
func (s *StatsdSink) SetGaugeWithLabels(key []string, val float32, labels []Label) {
flatKey := s.flattenKeyLabels(key, labels)
s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
}
func (s *StatsdSink) EmitKey(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|kv\n", flatKey, val))
@ -53,11 +65,21 @@ func (s *StatsdSink) IncrCounter(key []string, val float32) {
s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
}
func (s *StatsdSink) IncrCounterWithLabels(key []string, val float32, labels []Label) {
flatKey := s.flattenKeyLabels(key, labels)
s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
}
func (s *StatsdSink) AddSample(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
}
func (s *StatsdSink) AddSampleWithLabels(key []string, val float32, labels []Label) {
flatKey := s.flattenKeyLabels(key, labels)
s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
}
// Flattens the key for formatting, removes spaces
func (s *StatsdSink) flattenKey(parts []string) string {
joined := strings.Join(parts, ".")
@ -73,6 +95,14 @@ func (s *StatsdSink) flattenKey(parts []string) string {
}, joined)
}
// Flattens the key along with labels for formatting, removes spaces
func (s *StatsdSink) flattenKeyLabels(parts []string, labels []Label) string {
for _, label := range labels {
parts = append(parts, label.Value)
}
return s.flattenKey(parts)
}
// Does a non-blocking push to the metrics queue
func (s *StatsdSink) pushMetric(m string) {
select {

30
vendor/github.com/armon/go-metrics/statsite.go generated vendored Executable file → Normal file
View file

@ -5,6 +5,7 @@ import (
"fmt"
"log"
"net"
"net/url"
"strings"
"time"
)
@ -16,6 +17,12 @@ const (
flushInterval = 100 * time.Millisecond
)
// NewStatsiteSinkFromURL creates an StatsiteSink from a URL. It is used
// (and tested) from NewMetricSinkFromURL.
func NewStatsiteSinkFromURL(u *url.URL) (MetricSink, error) {
return NewStatsiteSink(u.Host)
}
// StatsiteSink provides a MetricSink that can be used with a
// statsite metrics server
type StatsiteSink struct {
@ -43,6 +50,11 @@ func (s *StatsiteSink) SetGauge(key []string, val float32) {
s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
}
func (s *StatsiteSink) SetGaugeWithLabels(key []string, val float32, labels []Label) {
flatKey := s.flattenKeyLabels(key, labels)
s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
}
func (s *StatsiteSink) EmitKey(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|kv\n", flatKey, val))
@ -53,11 +65,21 @@ func (s *StatsiteSink) IncrCounter(key []string, val float32) {
s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
}
func (s *StatsiteSink) IncrCounterWithLabels(key []string, val float32, labels []Label) {
flatKey := s.flattenKeyLabels(key, labels)
s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
}
func (s *StatsiteSink) AddSample(key []string, val float32) {
flatKey := s.flattenKey(key)
s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
}
func (s *StatsiteSink) AddSampleWithLabels(key []string, val float32, labels []Label) {
flatKey := s.flattenKeyLabels(key, labels)
s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
}
// Flattens the key for formatting, removes spaces
func (s *StatsiteSink) flattenKey(parts []string) string {
joined := strings.Join(parts, ".")
@ -73,6 +95,14 @@ func (s *StatsiteSink) flattenKey(parts []string) string {
}, joined)
}
// Flattens the key along with labels for formatting, removes spaces
func (s *StatsiteSink) flattenKeyLabels(parts []string, labels []Label) string {
for _, label := range labels {
parts = append(parts, label.Value)
}
return s.flattenKey(parts)
}
// Does a non-blocking push to the metrics queue
func (s *StatsiteSink) pushMetric(m string) {
select {

202
vendor/github.com/google/btree/LICENSE generated vendored Normal file
View file

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

12
vendor/github.com/google/btree/README.md generated vendored Normal file
View file

@ -0,0 +1,12 @@
# BTree implementation for Go
![Travis CI Build Status](https://api.travis-ci.org/google/btree.svg?branch=master)
This package provides an in-memory B-Tree implementation for Go, useful as
an ordered, mutable data structure.
The API is based off of the wonderful
http://godoc.org/github.com/petar/GoLLRB/llrb, and is meant to allow btree to
act as a drop-in replacement for gollrb trees.
See http://godoc.org/github.com/google/btree for documentation.

890
vendor/github.com/google/btree/btree.go generated vendored Normal file
View file

@ -0,0 +1,890 @@
// Copyright 2014 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package btree implements in-memory B-Trees of arbitrary degree.
//
// btree implements an in-memory B-Tree for use as an ordered data structure.
// It is not meant for persistent storage solutions.
//
// It has a flatter structure than an equivalent red-black or other binary tree,
// which in some cases yields better memory usage and/or performance.
// See some discussion on the matter here:
// http://google-opensource.blogspot.com/2013/01/c-containers-that-save-memory-and-time.html
// Note, though, that this project is in no way related to the C++ B-Tree
// implementation written about there.
//
// Within this tree, each node contains a slice of items and a (possibly nil)
// slice of children. For basic numeric values or raw structs, this can cause
// efficiency differences when compared to equivalent C++ template code that
// stores values in arrays within the node:
// * Due to the overhead of storing values as interfaces (each
// value needs to be stored as the value itself, then 2 words for the
// interface pointing to that value and its type), resulting in higher
// memory use.
// * Since interfaces can point to values anywhere in memory, values are
// most likely not stored in contiguous blocks, resulting in a higher
// number of cache misses.
// These issues don't tend to matter, though, when working with strings or other
// heap-allocated structures, since C++-equivalent structures also must store
// pointers and also distribute their values across the heap.
//
// This implementation is designed to be a drop-in replacement to gollrb.LLRB
// trees, (http://github.com/petar/gollrb), an excellent and probably the most
// widely used ordered tree implementation in the Go ecosystem currently.
// Its functions, therefore, exactly mirror those of
// llrb.LLRB where possible. Unlike gollrb, though, we currently don't
// support storing multiple equivalent values.
package btree
import (
"fmt"
"io"
"sort"
"strings"
"sync"
)
// Item represents a single object in the tree.
type Item interface {
// Less tests whether the current item is less than the given argument.
//
// This must provide a strict weak ordering.
// If !a.Less(b) && !b.Less(a), we treat this to mean a == b (i.e. we can only
// hold one of either a or b in the tree).
Less(than Item) bool
}
const (
DefaultFreeListSize = 32
)
var (
nilItems = make(items, 16)
nilChildren = make(children, 16)
)
// FreeList represents a free list of btree nodes. By default each
// BTree has its own FreeList, but multiple BTrees can share the same
// FreeList.
// Two Btrees using the same freelist are safe for concurrent write access.
type FreeList struct {
mu sync.Mutex
freelist []*node
}
// NewFreeList creates a new free list.
// size is the maximum size of the returned free list.
func NewFreeList(size int) *FreeList {
return &FreeList{freelist: make([]*node, 0, size)}
}
func (f *FreeList) newNode() (n *node) {
f.mu.Lock()
index := len(f.freelist) - 1
if index < 0 {
f.mu.Unlock()
return new(node)
}
n = f.freelist[index]
f.freelist[index] = nil
f.freelist = f.freelist[:index]
f.mu.Unlock()
return
}
// freeNode adds the given node to the list, returning true if it was added
// and false if it was discarded.
func (f *FreeList) freeNode(n *node) (out bool) {
f.mu.Lock()
if len(f.freelist) < cap(f.freelist) {
f.freelist = append(f.freelist, n)
out = true
}
f.mu.Unlock()
return
}
// ItemIterator allows callers of Ascend* to iterate in-order over portions of
// the tree. When this function returns false, iteration will stop and the
// associated Ascend* function will immediately return.
type ItemIterator func(i Item) bool
// New creates a new B-Tree with the given degree.
//
// New(2), for example, will create a 2-3-4 tree (each node contains 1-3 items
// and 2-4 children).
func New(degree int) *BTree {
return NewWithFreeList(degree, NewFreeList(DefaultFreeListSize))
}
// NewWithFreeList creates a new B-Tree that uses the given node free list.
func NewWithFreeList(degree int, f *FreeList) *BTree {
if degree <= 1 {
panic("bad degree")
}
return &BTree{
degree: degree,
cow: &copyOnWriteContext{freelist: f},
}
}
// items stores items in a node.
type items []Item
// insertAt inserts a value into the given index, pushing all subsequent values
// forward.
func (s *items) insertAt(index int, item Item) {
*s = append(*s, nil)
if index < len(*s) {
copy((*s)[index+1:], (*s)[index:])
}
(*s)[index] = item
}
// removeAt removes a value at a given index, pulling all subsequent values
// back.
func (s *items) removeAt(index int) Item {
item := (*s)[index]
copy((*s)[index:], (*s)[index+1:])
(*s)[len(*s)-1] = nil
*s = (*s)[:len(*s)-1]
return item
}
// pop removes and returns the last element in the list.
func (s *items) pop() (out Item) {
index := len(*s) - 1
out = (*s)[index]
(*s)[index] = nil
*s = (*s)[:index]
return
}
// truncate truncates this instance at index so that it contains only the
// first index items. index must be less than or equal to length.
func (s *items) truncate(index int) {
var toClear items
*s, toClear = (*s)[:index], (*s)[index:]
for len(toClear) > 0 {
toClear = toClear[copy(toClear, nilItems):]
}
}
// find returns the index where the given item should be inserted into this
// list. 'found' is true if the item already exists in the list at the given
// index.
func (s items) find(item Item) (index int, found bool) {
i := sort.Search(len(s), func(i int) bool {
return item.Less(s[i])
})
if i > 0 && !s[i-1].Less(item) {
return i - 1, true
}
return i, false
}
// children stores child nodes in a node.
type children []*node
// insertAt inserts a value into the given index, pushing all subsequent values
// forward.
func (s *children) insertAt(index int, n *node) {
*s = append(*s, nil)
if index < len(*s) {
copy((*s)[index+1:], (*s)[index:])
}
(*s)[index] = n
}
// removeAt removes a value at a given index, pulling all subsequent values
// back.
func (s *children) removeAt(index int) *node {
n := (*s)[index]
copy((*s)[index:], (*s)[index+1:])
(*s)[len(*s)-1] = nil
*s = (*s)[:len(*s)-1]
return n
}
// pop removes and returns the last element in the list.
func (s *children) pop() (out *node) {
index := len(*s) - 1
out = (*s)[index]
(*s)[index] = nil
*s = (*s)[:index]
return
}
// truncate truncates this instance at index so that it contains only the
// first index children. index must be less than or equal to length.
func (s *children) truncate(index int) {
var toClear children
*s, toClear = (*s)[:index], (*s)[index:]
for len(toClear) > 0 {
toClear = toClear[copy(toClear, nilChildren):]
}
}
// node is an internal node in a tree.
//
// It must at all times maintain the invariant that either
// * len(children) == 0, len(items) unconstrained
// * len(children) == len(items) + 1
type node struct {
items items
children children
cow *copyOnWriteContext
}
func (n *node) mutableFor(cow *copyOnWriteContext) *node {
if n.cow == cow {
return n
}
out := cow.newNode()
if cap(out.items) >= len(n.items) {
out.items = out.items[:len(n.items)]
} else {
out.items = make(items, len(n.items), cap(n.items))
}
copy(out.items, n.items)
// Copy children
if cap(out.children) >= len(n.children) {
out.children = out.children[:len(n.children)]
} else {
out.children = make(children, len(n.children), cap(n.children))
}
copy(out.children, n.children)
return out
}
func (n *node) mutableChild(i int) *node {
c := n.children[i].mutableFor(n.cow)
n.children[i] = c
return c
}
// split splits the given node at the given index. The current node shrinks,
// and this function returns the item that existed at that index and a new node
// containing all items/children after it.
func (n *node) split(i int) (Item, *node) {
item := n.items[i]
next := n.cow.newNode()
next.items = append(next.items, n.items[i+1:]...)
n.items.truncate(i)
if len(n.children) > 0 {
next.children = append(next.children, n.children[i+1:]...)
n.children.truncate(i + 1)
}
return item, next
}
// maybeSplitChild checks if a child should be split, and if so splits it.
// Returns whether or not a split occurred.
func (n *node) maybeSplitChild(i, maxItems int) bool {
if len(n.children[i].items) < maxItems {
return false
}
first := n.mutableChild(i)
item, second := first.split(maxItems / 2)
n.items.insertAt(i, item)
n.children.insertAt(i+1, second)
return true
}
// insert inserts an item into the subtree rooted at this node, making sure
// no nodes in the subtree exceed maxItems items. Should an equivalent item be
// be found/replaced by insert, it will be returned.
func (n *node) insert(item Item, maxItems int) Item {
i, found := n.items.find(item)
if found {
out := n.items[i]
n.items[i] = item
return out
}
if len(n.children) == 0 {
n.items.insertAt(i, item)
return nil
}
if n.maybeSplitChild(i, maxItems) {
inTree := n.items[i]
switch {
case item.Less(inTree):
// no change, we want first split node
case inTree.Less(item):
i++ // we want second split node
default:
out := n.items[i]
n.items[i] = item
return out
}
}
return n.mutableChild(i).insert(item, maxItems)
}
// get finds the given key in the subtree and returns it.
func (n *node) get(key Item) Item {
i, found := n.items.find(key)
if found {
return n.items[i]
} else if len(n.children) > 0 {
return n.children[i].get(key)
}
return nil
}
// min returns the first item in the subtree.
func min(n *node) Item {
if n == nil {
return nil
}
for len(n.children) > 0 {
n = n.children[0]
}
if len(n.items) == 0 {
return nil
}
return n.items[0]
}
// max returns the last item in the subtree.
func max(n *node) Item {
if n == nil {
return nil
}
for len(n.children) > 0 {
n = n.children[len(n.children)-1]
}
if len(n.items) == 0 {
return nil
}
return n.items[len(n.items)-1]
}
// toRemove details what item to remove in a node.remove call.
type toRemove int
const (
removeItem toRemove = iota // removes the given item
removeMin // removes smallest item in the subtree
removeMax // removes largest item in the subtree
)
// remove removes an item from the subtree rooted at this node.
func (n *node) remove(item Item, minItems int, typ toRemove) Item {
var i int
var found bool
switch typ {
case removeMax:
if len(n.children) == 0 {
return n.items.pop()
}
i = len(n.items)
case removeMin:
if len(n.children) == 0 {
return n.items.removeAt(0)
}
i = 0
case removeItem:
i, found = n.items.find(item)
if len(n.children) == 0 {
if found {
return n.items.removeAt(i)
}
return nil
}
default:
panic("invalid type")
}
// If we get to here, we have children.
if len(n.children[i].items) <= minItems {
return n.growChildAndRemove(i, item, minItems, typ)
}
child := n.mutableChild(i)
// Either we had enough items to begin with, or we've done some
// merging/stealing, because we've got enough now and we're ready to return
// stuff.
if found {
// The item exists at index 'i', and the child we've selected can give us a
// predecessor, since if we've gotten here it's got > minItems items in it.
out := n.items[i]
// We use our special-case 'remove' call with typ=maxItem to pull the
// predecessor of item i (the rightmost leaf of our immediate left child)
// and set it into where we pulled the item from.
n.items[i] = child.remove(nil, minItems, removeMax)
return out
}
// Final recursive call. Once we're here, we know that the item isn't in this
// node and that the child is big enough to remove from.
return child.remove(item, minItems, typ)
}
// growChildAndRemove grows child 'i' to make sure it's possible to remove an
// item from it while keeping it at minItems, then calls remove to actually
// remove it.
//
// Most documentation says we have to do two sets of special casing:
// 1) item is in this node
// 2) item is in child
// In both cases, we need to handle the two subcases:
// A) node has enough values that it can spare one
// B) node doesn't have enough values
// For the latter, we have to check:
// a) left sibling has node to spare
// b) right sibling has node to spare
// c) we must merge
// To simplify our code here, we handle cases #1 and #2 the same:
// If a node doesn't have enough items, we make sure it does (using a,b,c).
// We then simply redo our remove call, and the second time (regardless of
// whether we're in case 1 or 2), we'll have enough items and can guarantee
// that we hit case A.
func (n *node) growChildAndRemove(i int, item Item, minItems int, typ toRemove) Item {
if i > 0 && len(n.children[i-1].items) > minItems {
// Steal from left child
child := n.mutableChild(i)
stealFrom := n.mutableChild(i - 1)
stolenItem := stealFrom.items.pop()
child.items.insertAt(0, n.items[i-1])
n.items[i-1] = stolenItem
if len(stealFrom.children) > 0 {
child.children.insertAt(0, stealFrom.children.pop())
}
} else if i < len(n.items) && len(n.children[i+1].items) > minItems {
// steal from right child
child := n.mutableChild(i)
stealFrom := n.mutableChild(i + 1)
stolenItem := stealFrom.items.removeAt(0)
child.items = append(child.items, n.items[i])
n.items[i] = stolenItem
if len(stealFrom.children) > 0 {
child.children = append(child.children, stealFrom.children.removeAt(0))
}
} else {
if i >= len(n.items) {
i--
}
child := n.mutableChild(i)
// merge with right child
mergeItem := n.items.removeAt(i)
mergeChild := n.children.removeAt(i + 1)
child.items = append(child.items, mergeItem)
child.items = append(child.items, mergeChild.items...)
child.children = append(child.children, mergeChild.children...)
n.cow.freeNode(mergeChild)
}
return n.remove(item, minItems, typ)
}
type direction int
const (
descend = direction(-1)
ascend = direction(+1)
)
// iterate provides a simple method for iterating over elements in the tree.
//
// When ascending, the 'start' should be less than 'stop' and when descending,
// the 'start' should be greater than 'stop'. Setting 'includeStart' to true
// will force the iterator to include the first item when it equals 'start',
// thus creating a "greaterOrEqual" or "lessThanEqual" rather than just a
// "greaterThan" or "lessThan" queries.
func (n *node) iterate(dir direction, start, stop Item, includeStart bool, hit bool, iter ItemIterator) (bool, bool) {
var ok, found bool
var index int
switch dir {
case ascend:
if start != nil {
index, _ = n.items.find(start)
}
for i := index; i < len(n.items); i++ {
if len(n.children) > 0 {
if hit, ok = n.children[i].iterate(dir, start, stop, includeStart, hit, iter); !ok {
return hit, false
}
}
if !includeStart && !hit && start != nil && !start.Less(n.items[i]) {
hit = true
continue
}
hit = true
if stop != nil && !n.items[i].Less(stop) {
return hit, false
}
if !iter(n.items[i]) {
return hit, false
}
}
if len(n.children) > 0 {
if hit, ok = n.children[len(n.children)-1].iterate(dir, start, stop, includeStart, hit, iter); !ok {
return hit, false
}
}
case descend:
if start != nil {
index, found = n.items.find(start)
if !found {
index = index - 1
}
} else {
index = len(n.items) - 1
}
for i := index; i >= 0; i-- {
if start != nil && !n.items[i].Less(start) {
if !includeStart || hit || start.Less(n.items[i]) {
continue
}
}
if len(n.children) > 0 {
if hit, ok = n.children[i+1].iterate(dir, start, stop, includeStart, hit, iter); !ok {
return hit, false
}
}
if stop != nil && !stop.Less(n.items[i]) {
return hit, false // continue
}
hit = true
if !iter(n.items[i]) {
return hit, false
}
}
if len(n.children) > 0 {
if hit, ok = n.children[0].iterate(dir, start, stop, includeStart, hit, iter); !ok {
return hit, false
}
}
}
return hit, true
}
// Used for testing/debugging purposes.
func (n *node) print(w io.Writer, level int) {
fmt.Fprintf(w, "%sNODE:%v\n", strings.Repeat(" ", level), n.items)
for _, c := range n.children {
c.print(w, level+1)
}
}
// BTree is an implementation of a B-Tree.
//
// BTree stores Item instances in an ordered structure, allowing easy insertion,
// removal, and iteration.
//
// Write operations are not safe for concurrent mutation by multiple
// goroutines, but Read operations are.
type BTree struct {
degree int
length int
root *node
cow *copyOnWriteContext
}
// copyOnWriteContext pointers determine node ownership... a tree with a write
// context equivalent to a node's write context is allowed to modify that node.
// A tree whose write context does not match a node's is not allowed to modify
// it, and must create a new, writable copy (IE: it's a Clone).
//
// When doing any write operation, we maintain the invariant that the current
// node's context is equal to the context of the tree that requested the write.
// We do this by, before we descend into any node, creating a copy with the
// correct context if the contexts don't match.
//
// Since the node we're currently visiting on any write has the requesting
// tree's context, that node is modifiable in place. Children of that node may
// not share context, but before we descend into them, we'll make a mutable
// copy.
type copyOnWriteContext struct {
freelist *FreeList
}
// Clone clones the btree, lazily. Clone should not be called concurrently,
// but the original tree (t) and the new tree (t2) can be used concurrently
// once the Clone call completes.
//
// The internal tree structure of b is marked read-only and shared between t and
// t2. Writes to both t and t2 use copy-on-write logic, creating new nodes
// whenever one of b's original nodes would have been modified. Read operations
// should have no performance degredation. Write operations for both t and t2
// will initially experience minor slow-downs caused by additional allocs and
// copies due to the aforementioned copy-on-write logic, but should converge to
// the original performance characteristics of the original tree.
func (t *BTree) Clone() (t2 *BTree) {
// Create two entirely new copy-on-write contexts.
// This operation effectively creates three trees:
// the original, shared nodes (old b.cow)
// the new b.cow nodes
// the new out.cow nodes
cow1, cow2 := *t.cow, *t.cow
out := *t
t.cow = &cow1
out.cow = &cow2
return &out
}
// maxItems returns the max number of items to allow per node.
func (t *BTree) maxItems() int {
return t.degree*2 - 1
}
// minItems returns the min number of items to allow per node (ignored for the
// root node).
func (t *BTree) minItems() int {
return t.degree - 1
}
func (c *copyOnWriteContext) newNode() (n *node) {
n = c.freelist.newNode()
n.cow = c
return
}
type freeType int
const (
ftFreelistFull freeType = iota // node was freed (available for GC, not stored in freelist)
ftStored // node was stored in the freelist for later use
ftNotOwned // node was ignored by COW, since it's owned by another one
)
// freeNode frees a node within a given COW context, if it's owned by that
// context. It returns what happened to the node (see freeType const
// documentation).
func (c *copyOnWriteContext) freeNode(n *node) freeType {
if n.cow == c {
// clear to allow GC
n.items.truncate(0)
n.children.truncate(0)
n.cow = nil
if c.freelist.freeNode(n) {
return ftStored
} else {
return ftFreelistFull
}
} else {
return ftNotOwned
}
}
// ReplaceOrInsert adds the given item to the tree. If an item in the tree
// already equals the given one, it is removed from the tree and returned.
// Otherwise, nil is returned.
//
// nil cannot be added to the tree (will panic).
func (t *BTree) ReplaceOrInsert(item Item) Item {
if item == nil {
panic("nil item being added to BTree")
}
if t.root == nil {
t.root = t.cow.newNode()
t.root.items = append(t.root.items, item)
t.length++
return nil
} else {
t.root = t.root.mutableFor(t.cow)
if len(t.root.items) >= t.maxItems() {
item2, second := t.root.split(t.maxItems() / 2)
oldroot := t.root
t.root = t.cow.newNode()
t.root.items = append(t.root.items, item2)
t.root.children = append(t.root.children, oldroot, second)
}
}
out := t.root.insert(item, t.maxItems())
if out == nil {
t.length++
}
return out
}
// Delete removes an item equal to the passed in item from the tree, returning
// it. If no such item exists, returns nil.
func (t *BTree) Delete(item Item) Item {
return t.deleteItem(item, removeItem)
}
// DeleteMin removes the smallest item in the tree and returns it.
// If no such item exists, returns nil.
func (t *BTree) DeleteMin() Item {
return t.deleteItem(nil, removeMin)
}
// DeleteMax removes the largest item in the tree and returns it.
// If no such item exists, returns nil.
func (t *BTree) DeleteMax() Item {
return t.deleteItem(nil, removeMax)
}
func (t *BTree) deleteItem(item Item, typ toRemove) Item {
if t.root == nil || len(t.root.items) == 0 {
return nil
}
t.root = t.root.mutableFor(t.cow)
out := t.root.remove(item, t.minItems(), typ)
if len(t.root.items) == 0 && len(t.root.children) > 0 {
oldroot := t.root
t.root = t.root.children[0]
t.cow.freeNode(oldroot)
}
if out != nil {
t.length--
}
return out
}
// AscendRange calls the iterator for every value in the tree within the range
// [greaterOrEqual, lessThan), until iterator returns false.
func (t *BTree) AscendRange(greaterOrEqual, lessThan Item, iterator ItemIterator) {
if t.root == nil {
return
}
t.root.iterate(ascend, greaterOrEqual, lessThan, true, false, iterator)
}
// AscendLessThan calls the iterator for every value in the tree within the range
// [first, pivot), until iterator returns false.
func (t *BTree) AscendLessThan(pivot Item, iterator ItemIterator) {
if t.root == nil {
return
}
t.root.iterate(ascend, nil, pivot, false, false, iterator)
}
// AscendGreaterOrEqual calls the iterator for every value in the tree within
// the range [pivot, last], until iterator returns false.
func (t *BTree) AscendGreaterOrEqual(pivot Item, iterator ItemIterator) {
if t.root == nil {
return
}
t.root.iterate(ascend, pivot, nil, true, false, iterator)
}
// Ascend calls the iterator for every value in the tree within the range
// [first, last], until iterator returns false.
func (t *BTree) Ascend(iterator ItemIterator) {
if t.root == nil {
return
}
t.root.iterate(ascend, nil, nil, false, false, iterator)
}
// DescendRange calls the iterator for every value in the tree within the range
// [lessOrEqual, greaterThan), until iterator returns false.
func (t *BTree) DescendRange(lessOrEqual, greaterThan Item, iterator ItemIterator) {
if t.root == nil {
return
}
t.root.iterate(descend, lessOrEqual, greaterThan, true, false, iterator)
}
// DescendLessOrEqual calls the iterator for every value in the tree within the range
// [pivot, first], until iterator returns false.
func (t *BTree) DescendLessOrEqual(pivot Item, iterator ItemIterator) {
if t.root == nil {
return
}
t.root.iterate(descend, pivot, nil, true, false, iterator)
}
// DescendGreaterThan calls the iterator for every value in the tree within
// the range (pivot, last], until iterator returns false.
func (t *BTree) DescendGreaterThan(pivot Item, iterator ItemIterator) {
if t.root == nil {
return
}
t.root.iterate(descend, nil, pivot, false, false, iterator)
}
// Descend calls the iterator for every value in the tree within the range
// [last, first], until iterator returns false.
func (t *BTree) Descend(iterator ItemIterator) {
if t.root == nil {
return
}
t.root.iterate(descend, nil, nil, false, false, iterator)
}
// Get looks for the key item in the tree, returning it. It returns nil if
// unable to find that item.
func (t *BTree) Get(key Item) Item {
if t.root == nil {
return nil
}
return t.root.get(key)
}
// Min returns the smallest item in the tree, or nil if the tree is empty.
func (t *BTree) Min() Item {
return min(t.root)
}
// Max returns the largest item in the tree, or nil if the tree is empty.
func (t *BTree) Max() Item {
return max(t.root)
}
// Has returns true if the given key is in the tree.
func (t *BTree) Has(key Item) bool {
return t.Get(key) != nil
}
// Len returns the number of items currently in the tree.
func (t *BTree) Len() int {
return t.length
}
// Clear removes all items from the btree. If addNodesToFreelist is true,
// t's nodes are added to its freelist as part of this call, until the freelist
// is full. Otherwise, the root node is simply dereferenced and the subtree
// left to Go's normal GC processes.
//
// This can be much faster
// than calling Delete on all elements, because that requires finding/removing
// each element in the tree and updating the tree accordingly. It also is
// somewhat faster than creating a new tree to replace the old one, because
// nodes from the old tree are reclaimed into the freelist for use by the new
// one, instead of being lost to the garbage collector.
//
// This call takes:
// O(1): when addNodesToFreelist is false, this is a single operation.
// O(1): when the freelist is already full, it breaks out immediately
// O(freelist size): when the freelist is empty and the nodes are all owned
// by this tree, nodes are added to the freelist until full.
// O(tree size): when all nodes are owned by another tree, all nodes are
// iterated over looking for nodes to add to the freelist, and due to
// ownership, none are.
func (t *BTree) Clear(addNodesToFreelist bool) {
if t.root != nil && addNodesToFreelist {
t.root.reset(t.cow)
}
t.root, t.length = nil, 0
}
// reset returns a subtree to the freelist. It breaks out immediately if the
// freelist is full, since the only benefit of iterating is to fill that
// freelist up. Returns true if parent reset call should continue.
func (n *node) reset(c *copyOnWriteContext) bool {
for _, child := range n.children {
if !child.reset(c) {
return false
}
}
return c.freeNode(n) != ftFreelistFull
}
// Int implements the Item interface for integers.
type Int int
// Less returns true if int(a) < int(b).
func (a Int) Less(b Item) bool {
return a < b.(Int)
}

View file

@ -1,4 +1,4 @@
# memberlist [![GoDoc](https://godoc.org/github.com/hashicorp/memberlist?status.png)](https://godoc.org/github.com/hashicorp/memberlist)
# memberlist [![GoDoc](https://godoc.org/github.com/hashicorp/memberlist?status.png)](https://godoc.org/github.com/hashicorp/memberlist) [![CircleCI](https://circleci.com/gh/hashicorp/memberlist.svg?style=svg)](https://circleci.com/gh/hashicorp/memberlist)
memberlist is a [Go](http://www.golang.org) library that manages cluster
membership and member failure detection using a gossip based protocol.
@ -23,8 +23,6 @@ Please check your installation with:
go version
```
Run `make deps` to fetch dependencies before building
## Usage
Memberlist is surprisingly simple to use. An example is shown below:
@ -65,7 +63,7 @@ For complete documentation, see the associated [Godoc](http://godoc.org/github.c
## Protocol
memberlist is based on ["SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol"](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf). However, we extend the protocol in a number of ways:
memberlist is based on ["SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol"](http://ieeexplore.ieee.org/document/1028914/). However, we extend the protocol in a number of ways:
* Several extensions are made to increase propagation speed and
convergence rate.

View file

@ -7,8 +7,8 @@ package memberlist
// a node out and prevent it from being considered a peer
// using application specific logic.
type AliveDelegate interface {
// NotifyMerge is invoked when a merge could take place.
// Provides a list of the nodes known by the peer. If
// the return value is non-nil, the merge is canceled.
// NotifyAlive is invoked when a message about a live
// node is received from the network. Returning a non-nil
// error prevents the node from being considered a peer.
NotifyAlive(peer *Node) error
}

View file

@ -21,13 +21,17 @@ type awareness struct {
// score is the current awareness score. Lower values are healthier and
// zero is the minimum value.
score int
// metricLabels is the slice of labels to put on all emitted metrics
metricLabels []metrics.Label
}
// newAwareness returns a new awareness object.
func newAwareness(max int) *awareness {
func newAwareness(max int, metricLabels []metrics.Label) *awareness {
return &awareness{
max: max,
score: 0,
max: max,
score: 0,
metricLabels: metricLabels,
}
}
@ -47,7 +51,7 @@ func (a *awareness) ApplyDelta(delta int) {
a.Unlock()
if initial != final {
metrics.SetGauge([]string{"memberlist", "health", "score"}, float32(final))
metrics.SetGaugeWithLabels([]string{"memberlist", "health", "score"}, float32(final), a.metricLabels)
}
}

View file

@ -29,6 +29,11 @@ func (b *memberlistBroadcast) Invalidates(other Broadcast) bool {
return b.node == mb.node
}
// memberlist.NamedBroadcast optional interface
func (b *memberlistBroadcast) Name() string {
return b.node
}
func (b *memberlistBroadcast) Message() []byte {
return b.msg
}

View file

@ -1,10 +1,16 @@
package memberlist
import (
"fmt"
"io"
"log"
"net"
"os"
"strings"
"time"
"github.com/armon/go-metrics"
multierror "github.com/hashicorp/go-multierror"
)
type Config struct {
@ -16,6 +22,17 @@ type Config struct {
// make a NetTransport using BindAddr and BindPort from this structure.
Transport Transport
// Label is an optional set of bytes to include on the outside of each
// packet and stream.
//
// If gossip encryption is enabled and this is set it is treated as GCM
// authenticated data.
Label string
// SkipInboundLabelCheck skips the check that inbound packets and gossip
// streams need to be label prefixed.
SkipInboundLabelCheck bool
// Configuration related to what address to bind to and ports to
// listen on. The port is used for both UDP and TCP gossip. It is
// assumed other nodes are running on this port, but they do not need
@ -116,6 +133,10 @@ type Config struct {
// indirect UDP pings.
DisableTcpPings bool
// DisableTcpPingsForNode is like DisableTcpPings, but lets you control
// whether to perform TCP pings on a node-by-node basis.
DisableTcpPingsForNode func(nodeName string) bool
// AwarenessMaxMultiplier will increase the probe interval if the node
// becomes aware that it might be degraded and not meeting the soft real
// time requirements to reliably probe other nodes.
@ -215,6 +236,48 @@ type Config struct {
// This is a legacy name for backward compatibility but should really be
// called PacketBufferSize now that we have generalized the transport.
UDPBufferSize int
// DeadNodeReclaimTime controls the time before a dead node's name can be
// reclaimed by one with a different address or port. By default, this is 0,
// meaning nodes cannot be reclaimed this way.
DeadNodeReclaimTime time.Duration
// RequireNodeNames controls if the name of a node is required when sending
// a message to that node.
RequireNodeNames bool
// CIDRsAllowed If nil, allow any connection (default), otherwise specify all networks
// allowed to connect (you must specify IPv6/IPv4 separately)
// Using [] will block all connections.
CIDRsAllowed []net.IPNet
// MetricLabels is a map of optional labels to apply to all metrics emitted.
MetricLabels []metrics.Label
}
// ParseCIDRs return a possible empty list of all Network that have been parsed
// In case of error, it returns succesfully parsed CIDRs and the last error found
func ParseCIDRs(v []string) ([]net.IPNet, error) {
nets := make([]net.IPNet, 0)
if v == nil {
return nets, nil
}
var errs error
hasErrors := false
for _, p := range v {
_, net, err := net.ParseCIDR(strings.TrimSpace(p))
if err != nil {
err = fmt.Errorf("invalid cidr: %s", p)
errs = multierror.Append(errs, err)
hasErrors = true
} else {
nets = append(nets, *net)
}
}
if !hasErrors {
errs = nil
}
return nets, errs
}
// DefaultLANConfig returns a sane set of configurations for Memberlist.
@ -258,6 +321,7 @@ func DefaultLANConfig() *Config {
HandoffQueueDepth: 1024,
UDPBufferSize: 1400,
CIDRsAllowed: nil, // same as allow all
}
}
@ -277,6 +341,24 @@ func DefaultWANConfig() *Config {
return conf
}
// IPMustBeChecked return true if IPAllowed must be called
func (c *Config) IPMustBeChecked() bool {
return len(c.CIDRsAllowed) > 0
}
// IPAllowed return an error if access to memberlist is denied
func (c *Config) IPAllowed(ip net.IP) error {
if !c.IPMustBeChecked() {
return nil
}
for _, n := range c.CIDRsAllowed {
if n.Contains(ip) {
return nil
}
}
return fmt.Errorf("%s is not allowed", ip)
}
// DefaultLocalConfig works like DefaultConfig, however it returns a configuration
// that is optimized for a local loopback environments. The default configuration is
// still very conservative and errs on the side of caution.

View file

@ -49,13 +49,16 @@ type NodeEvent struct {
}
func (c *ChannelEventDelegate) NotifyJoin(n *Node) {
c.Ch <- NodeEvent{NodeJoin, n}
node := *n
c.Ch <- NodeEvent{NodeJoin, &node}
}
func (c *ChannelEventDelegate) NotifyLeave(n *Node) {
c.Ch <- NodeEvent{NodeLeave, n}
node := *n
c.Ch <- NodeEvent{NodeLeave, &node}
}
func (c *ChannelEventDelegate) NotifyUpdate(n *Node) {
c.Ch <- NodeEvent{NodeUpdate, n}
node := *n
c.Ch <- NodeEvent{NodeUpdate, &node}
}

19
vendor/github.com/hashicorp/memberlist/go.mod generated vendored Normal file
View file

@ -0,0 +1,19 @@
module github.com/hashicorp/memberlist
go 1.12
require (
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c
github.com/hashicorp/go-immutable-radix v1.0.0 // indirect
github.com/hashicorp/go-msgpack v0.5.3
github.com/hashicorp/go-multierror v1.0.0
github.com/hashicorp/go-sockaddr v1.0.0
github.com/miekg/dns v1.1.26
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529
github.com/stretchr/testify v1.2.2
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 // indirect
)

178
vendor/github.com/hashicorp/memberlist/label.go generated vendored Normal file
View file

@ -0,0 +1,178 @@
package memberlist
import (
"bufio"
"fmt"
"io"
"net"
)
// General approach is to prefix all packets and streams with the same structure:
//
// magic type byte (244): uint8
// length of label name: uint8 (because labels can't be longer than 255 bytes)
// label name: []uint8
// LabelMaxSize is the maximum length of a packet or stream label.
const LabelMaxSize = 255
// AddLabelHeaderToPacket prefixes outgoing packets with the correct header if
// the label is not empty.
func AddLabelHeaderToPacket(buf []byte, label string) ([]byte, error) {
if label == "" {
return buf, nil
}
if len(label) > LabelMaxSize {
return nil, fmt.Errorf("label %q is too long", label)
}
return makeLabelHeader(label, buf), nil
}
// RemoveLabelHeaderFromPacket removes any label header from the provided
// packet and returns it along with the remaining packet contents.
func RemoveLabelHeaderFromPacket(buf []byte) (newBuf []byte, label string, err error) {
if len(buf) == 0 {
return buf, "", nil // can't possibly be labeled
}
// [type:byte] [size:byte] [size bytes]
msgType := messageType(buf[0])
if msgType != hasLabelMsg {
return buf, "", nil
}
if len(buf) < 2 {
return nil, "", fmt.Errorf("cannot decode label; packet has been truncated")
}
size := int(buf[1])
if size < 1 {
return nil, "", fmt.Errorf("label header cannot be empty when present")
}
if len(buf) < 2+size {
return nil, "", fmt.Errorf("cannot decode label; packet has been truncated")
}
label = string(buf[2 : 2+size])
newBuf = buf[2+size:]
return newBuf, label, nil
}
// AddLabelHeaderToStream prefixes outgoing streams with the correct header if
// the label is not empty.
func AddLabelHeaderToStream(conn net.Conn, label string) error {
if label == "" {
return nil
}
if len(label) > LabelMaxSize {
return fmt.Errorf("label %q is too long", label)
}
header := makeLabelHeader(label, nil)
_, err := conn.Write(header)
return err
}
// RemoveLabelHeaderFromStream removes any label header from the beginning of
// the stream if present and returns it along with an updated conn with that
// header removed.
//
// Note that on error it is the caller's responsibility to close the
// connection.
func RemoveLabelHeaderFromStream(conn net.Conn) (net.Conn, string, error) {
br := bufio.NewReader(conn)
// First check for the type byte.
peeked, err := br.Peek(1)
if err != nil {
if err == io.EOF {
// It is safe to return the original net.Conn at this point because
// it never contained any data in the first place so we don't have
// to splice the buffer into the conn because both are empty.
return conn, "", nil
}
return nil, "", err
}
msgType := messageType(peeked[0])
if msgType != hasLabelMsg {
conn, err = newPeekedConnFromBufferedReader(conn, br, 0)
return conn, "", err
}
// We are guaranteed to get a size byte as well.
peeked, err = br.Peek(2)
if err != nil {
if err == io.EOF {
return nil, "", fmt.Errorf("cannot decode label; stream has been truncated")
}
return nil, "", err
}
size := int(peeked[1])
if size < 1 {
return nil, "", fmt.Errorf("label header cannot be empty when present")
}
// NOTE: we don't have to check this against LabelMaxSize because a byte
// already has a max value of 255.
// Once we know the size we can peek the label as well. Note that since we
// are using the default bufio.Reader size of 4096, the entire label header
// fits in the initial buffer fill so this should be free.
peeked, err = br.Peek(2 + size)
if err != nil {
if err == io.EOF {
return nil, "", fmt.Errorf("cannot decode label; stream has been truncated")
}
return nil, "", err
}
label := string(peeked[2 : 2+size])
conn, err = newPeekedConnFromBufferedReader(conn, br, 2+size)
if err != nil {
return nil, "", err
}
return conn, label, nil
}
// newPeekedConnFromBufferedReader will splice the buffer contents after the
// offset into the provided net.Conn and return the result so that the rest of
// the buffer contents are returned first when reading from the returned
// peekedConn before moving on to the unbuffered conn contents.
func newPeekedConnFromBufferedReader(conn net.Conn, br *bufio.Reader, offset int) (*peekedConn, error) {
// Extract any of the readahead buffer.
peeked, err := br.Peek(br.Buffered())
if err != nil {
return nil, err
}
return &peekedConn{
Peeked: peeked[offset:],
Conn: conn,
}, nil
}
func makeLabelHeader(label string, rest []byte) []byte {
newBuf := make([]byte, 2, 2+len(label)+len(rest))
newBuf[0] = byte(hasLabelMsg)
newBuf[1] = byte(len(label))
newBuf = append(newBuf, []byte(label)...)
if len(rest) > 0 {
newBuf = append(newBuf, []byte(rest)...)
}
return newBuf
}
func labelOverhead(label string) int {
if label == "" {
return 0
}
return 2 + len(label)
}

View file

@ -13,6 +13,14 @@ func LogAddress(addr net.Addr) string {
return fmt.Sprintf("from=%s", addr.String())
}
func LogStringAddress(addr string) string {
if addr == "" {
return "from=<unknown address>"
}
return fmt.Sprintf("from=%s", addr)
}
func LogConn(conn net.Conn) string {
if conn == nil {
return LogAddress(nil)

View file

@ -15,6 +15,8 @@ multiple routes.
package memberlist
import (
"container/list"
"errors"
"fmt"
"log"
"net"
@ -25,15 +27,23 @@ import (
"sync/atomic"
"time"
"github.com/armon/go-metrics"
multierror "github.com/hashicorp/go-multierror"
sockaddr "github.com/hashicorp/go-sockaddr"
"github.com/miekg/dns"
)
var errNodeNamesAreRequired = errors.New("memberlist: node names are required by configuration but one was not provided")
type Memberlist struct {
sequenceNum uint32 // Local sequence number
incarnation uint32 // Local incarnation number
numNodes uint32 // Number of known nodes (estimate)
pushPullReq uint32 // Number of push/pull requests
advertiseLock sync.RWMutex
advertiseAddr net.IP
advertisePort uint16
config *Config
shutdown int32 // Used as an atomic boolean value
@ -44,13 +54,17 @@ type Memberlist struct {
shutdownLock sync.Mutex // Serializes calls to Shutdown
leaveLock sync.Mutex // Serializes calls to Leave
transport Transport
handoff chan msgHandoff
transport NodeAwareTransport
handoffCh chan struct{}
highPriorityMsgQueue *list.List
lowPriorityMsgQueue *list.List
msgQueueLock sync.Mutex
nodeLock sync.RWMutex
nodes []*nodeState // Known nodes
nodeMap map[string]*nodeState // Maps Addr.String() -> NodeState
nodeTimers map[string]*suspicion // Maps Addr.String() -> suspicion timer
nodeMap map[string]*nodeState // Maps Node.Name -> NodeState
nodeTimers map[string]*suspicion // Maps Node.Name -> suspicion timer
awareness *awareness
tickerLock sync.Mutex
@ -64,6 +78,18 @@ type Memberlist struct {
broadcasts *TransmitLimitedQueue
logger *log.Logger
// metricLabels is the slice of labels to put on all emitted metrics
metricLabels []metrics.Label
}
// BuildVsnArray creates the array of Vsn
func (conf *Config) BuildVsnArray() []uint8 {
return []uint8{
ProtocolVersionMin, ProtocolVersionMax, conf.ProtocolVersion,
conf.DelegateProtocolMin, conf.DelegateProtocolMax,
conf.DelegateProtocolVersion,
}
}
// newMemberlist creates the network listeners.
@ -113,9 +139,10 @@ func newMemberlist(conf *Config) (*Memberlist, error) {
transport := conf.Transport
if transport == nil {
nc := &NetTransportConfig{
BindAddrs: []string{conf.BindAddr},
BindPort: conf.BindPort,
Logger: logger,
BindAddrs: []string{conf.BindAddr},
BindPort: conf.BindPort,
Logger: logger,
MetricLabels: conf.MetricLabels,
}
// See comment below for details about the retry in here.
@ -159,22 +186,50 @@ func newMemberlist(conf *Config) (*Memberlist, error) {
transport = nt
}
nodeAwareTransport, ok := transport.(NodeAwareTransport)
if !ok {
logger.Printf("[DEBUG] memberlist: configured Transport is not a NodeAwareTransport and some features may not work as desired")
nodeAwareTransport = &shimNodeAwareTransport{transport}
}
if len(conf.Label) > LabelMaxSize {
return nil, fmt.Errorf("could not use %q as a label: too long", conf.Label)
}
if conf.Label != "" {
nodeAwareTransport = &labelWrappedTransport{
label: conf.Label,
NodeAwareTransport: nodeAwareTransport,
}
}
m := &Memberlist{
config: conf,
shutdownCh: make(chan struct{}),
leaveBroadcast: make(chan struct{}, 1),
transport: transport,
handoff: make(chan msgHandoff, conf.HandoffQueueDepth),
nodeMap: make(map[string]*nodeState),
nodeTimers: make(map[string]*suspicion),
awareness: newAwareness(conf.AwarenessMaxMultiplier),
ackHandlers: make(map[uint32]*ackHandler),
broadcasts: &TransmitLimitedQueue{RetransmitMult: conf.RetransmitMult},
logger: logger,
config: conf,
shutdownCh: make(chan struct{}),
leaveBroadcast: make(chan struct{}, 1),
transport: nodeAwareTransport,
handoffCh: make(chan struct{}, 1),
highPriorityMsgQueue: list.New(),
lowPriorityMsgQueue: list.New(),
nodeMap: make(map[string]*nodeState),
nodeTimers: make(map[string]*suspicion),
awareness: newAwareness(conf.AwarenessMaxMultiplier, conf.MetricLabels),
ackHandlers: make(map[uint32]*ackHandler),
broadcasts: &TransmitLimitedQueue{RetransmitMult: conf.RetransmitMult},
logger: logger,
metricLabels: conf.MetricLabels,
}
m.broadcasts.NumNodes = func() int {
return m.estNumNodes()
}
// Get the final advertise address from the transport, which may need
// to see which address we bound to. We'll refresh this each time we
// send out an alive message.
if _, _, err := m.refreshAdvertise(); err != nil {
return nil, err
}
go m.streamListen()
go m.packetListen()
go m.packetHandler()
@ -222,8 +277,9 @@ func (m *Memberlist) Join(existing []string) (int, error) {
for _, addr := range addrs {
hp := joinHostPort(addr.ip.String(), addr.port)
if err := m.pushPullNode(hp, true); err != nil {
err = fmt.Errorf("Failed to join %s: %v", addr.ip, err)
a := Address{Addr: hp, Name: addr.nodeName}
if err := m.pushPullNode(a, true); err != nil {
err = fmt.Errorf("Failed to join %s: %v", a.Addr, err)
errs = multierror.Append(errs, err)
m.logger.Printf("[DEBUG] memberlist: %v", err)
continue
@ -240,8 +296,9 @@ func (m *Memberlist) Join(existing []string) (int, error) {
// ipPort holds information about a node we want to try to join.
type ipPort struct {
ip net.IP
port uint16
ip net.IP
port uint16
nodeName string // optional
}
// tcpLookupIP is a helper to initiate a TCP-based DNS lookup for the given host.
@ -250,7 +307,7 @@ type ipPort struct {
// Consul's. By doing the TCP lookup directly, we get the best chance for the
// largest list of hosts to join. Since joins are relatively rare events, it's ok
// to do this rather expensive operation.
func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16) ([]ipPort, error) {
func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16, nodeName string) ([]ipPort, error) {
// Don't attempt any TCP lookups against non-fully qualified domain
// names, since those will likely come from the resolv.conf file.
if !strings.Contains(host, ".") {
@ -292,9 +349,9 @@ func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16) ([]ipPort, err
for _, r := range in.Answer {
switch rr := r.(type) {
case (*dns.A):
ips = append(ips, ipPort{rr.A, defaultPort})
ips = append(ips, ipPort{ip: rr.A, port: defaultPort, nodeName: nodeName})
case (*dns.AAAA):
ips = append(ips, ipPort{rr.AAAA, defaultPort})
ips = append(ips, ipPort{ip: rr.AAAA, port: defaultPort, nodeName: nodeName})
case (*dns.CNAME):
m.logger.Printf("[DEBUG] memberlist: Ignoring CNAME RR in TCP-first answer for '%s'", host)
}
@ -308,6 +365,16 @@ func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16) ([]ipPort, err
// resolveAddr is used to resolve the address into an address,
// port, and error. If no port is given, use the default
func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) {
// First peel off any leading node name. This is optional.
nodeName := ""
if slashIdx := strings.Index(hostStr, "/"); slashIdx >= 0 {
if slashIdx == 0 {
return nil, fmt.Errorf("empty node name provided")
}
nodeName = hostStr[0:slashIdx]
hostStr = hostStr[slashIdx+1:]
}
// This captures the supplied port, or the default one.
hostStr = ensurePort(hostStr, m.config.BindPort)
host, sport, err := net.SplitHostPort(hostStr)
@ -324,13 +391,15 @@ func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) {
// will make sure the host part is in good shape for parsing, even for
// IPv6 addresses.
if ip := net.ParseIP(host); ip != nil {
return []ipPort{ipPort{ip, port}}, nil
return []ipPort{
ipPort{ip: ip, port: port, nodeName: nodeName},
}, nil
}
// First try TCP so we have the best chance for the largest list of
// hosts to join. If this fails it's not fatal since this isn't a standard
// way to query DNS, and we have a fallback below.
ips, err := m.tcpLookupIP(host, port)
ips, err := m.tcpLookupIP(host, port, nodeName)
if err != nil {
m.logger.Printf("[DEBUG] memberlist: TCP-first lookup failed for '%s', falling back to UDP: %s", hostStr, err)
}
@ -347,7 +416,7 @@ func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) {
}
ips = make([]ipPort, 0, len(ans))
for _, ip := range ans {
ips = append(ips, ipPort{ip, port})
ips = append(ips, ipPort{ip: ip, port: port, nodeName: nodeName})
}
return ips, nil
}
@ -358,10 +427,9 @@ func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) {
func (m *Memberlist) setAlive() error {
// Get the final advertise address from the transport, which may need
// to see which address we bound to.
addr, port, err := m.transport.FinalAdvertiseAddr(
m.config.AdvertiseAddr, m.config.AdvertisePort)
addr, port, err := m.refreshAdvertise()
if err != nil {
return fmt.Errorf("Failed to get final advertise address: %v", err)
return err
}
// Check if this is a public address without encryption
@ -394,16 +462,36 @@ func (m *Memberlist) setAlive() error {
Addr: addr,
Port: uint16(port),
Meta: meta,
Vsn: []uint8{
ProtocolVersionMin, ProtocolVersionMax, m.config.ProtocolVersion,
m.config.DelegateProtocolMin, m.config.DelegateProtocolMax,
m.config.DelegateProtocolVersion,
},
Vsn: m.config.BuildVsnArray(),
}
m.aliveNode(&a, nil, true)
return nil
}
func (m *Memberlist) getAdvertise() (net.IP, uint16) {
m.advertiseLock.RLock()
defer m.advertiseLock.RUnlock()
return m.advertiseAddr, m.advertisePort
}
func (m *Memberlist) setAdvertise(addr net.IP, port int) {
m.advertiseLock.Lock()
defer m.advertiseLock.Unlock()
m.advertiseAddr = addr
m.advertisePort = uint16(port)
}
func (m *Memberlist) refreshAdvertise() (net.IP, int, error) {
addr, port, err := m.transport.FinalAdvertiseAddr(
m.config.AdvertiseAddr, m.config.AdvertisePort)
if err != nil {
return nil, 0, fmt.Errorf("Failed to get final advertise address: %v", err)
}
m.setAdvertise(addr, port)
return addr, port, nil
}
// LocalNode is used to return the local Node
func (m *Memberlist) LocalNode() *Node {
m.nodeLock.RLock()
@ -439,11 +527,7 @@ func (m *Memberlist) UpdateNode(timeout time.Duration) error {
Addr: state.Addr,
Port: state.Port,
Meta: meta,
Vsn: []uint8{
ProtocolVersionMin, ProtocolVersionMax, m.config.ProtocolVersion,
m.config.DelegateProtocolMin, m.config.DelegateProtocolMax,
m.config.DelegateProtocolVersion,
},
Vsn: m.config.BuildVsnArray(),
}
notifyCh := make(chan struct{})
m.aliveNode(&a, notifyCh, true)
@ -463,24 +547,29 @@ func (m *Memberlist) UpdateNode(timeout time.Duration) error {
return nil
}
// SendTo is deprecated in favor of SendBestEffort, which requires a node to
// target.
// Deprecated: SendTo is deprecated in favor of SendBestEffort, which requires a node to
// target. If you don't have a node then use SendToAddress.
func (m *Memberlist) SendTo(to net.Addr, msg []byte) error {
a := Address{Addr: to.String(), Name: ""}
return m.SendToAddress(a, msg)
}
func (m *Memberlist) SendToAddress(a Address, msg []byte) error {
// Encode as a user message
buf := make([]byte, 1, len(msg)+1)
buf[0] = byte(userMsg)
buf = append(buf, msg...)
// Send the message
return m.rawSendMsgPacket(to.String(), nil, buf)
return m.rawSendMsgPacket(a, nil, buf)
}
// SendToUDP is deprecated in favor of SendBestEffort.
// Deprecated: SendToUDP is deprecated in favor of SendBestEffort.
func (m *Memberlist) SendToUDP(to *Node, msg []byte) error {
return m.SendBestEffort(to, msg)
}
// SendToTCP is deprecated in favor of SendReliable.
// Deprecated: SendToTCP is deprecated in favor of SendReliable.
func (m *Memberlist) SendToTCP(to *Node, msg []byte) error {
return m.SendReliable(to, msg)
}
@ -496,7 +585,8 @@ func (m *Memberlist) SendBestEffort(to *Node, msg []byte) error {
buf = append(buf, msg...)
// Send the message
return m.rawSendMsgPacket(to.Address(), to, buf)
a := Address{Addr: to.Address(), Name: to.Name}
return m.rawSendMsgPacket(a, to, buf)
}
// SendReliable uses the reliable stream-oriented interface of the transport to
@ -504,7 +594,7 @@ func (m *Memberlist) SendBestEffort(to *Node, msg []byte) error {
// mechanism). Delivery is guaranteed if no error is returned, and there is no
// limit on the size of the message.
func (m *Memberlist) SendReliable(to *Node, msg []byte) error {
return m.sendUserMsg(to.Address(), msg)
return m.sendUserMsg(to.FullAddress(), msg)
}
// Members returns a list of all known live nodes. The node structures
@ -516,7 +606,7 @@ func (m *Memberlist) Members() []*Node {
nodes := make([]*Node, 0, len(m.nodes))
for _, n := range m.nodes {
if n.State != stateDead {
if !n.DeadOrLeft() {
nodes = append(nodes, &n.Node)
}
}
@ -533,7 +623,7 @@ func (m *Memberlist) NumMembers() (alive int) {
defer m.nodeLock.RUnlock()
for _, n := range m.nodes {
if n.State != stateDead {
if !n.DeadOrLeft() {
alive++
}
}
@ -570,9 +660,14 @@ func (m *Memberlist) Leave(timeout time.Duration) error {
return nil
}
// This dead message is special, because Node and From are the
// same. This helps other nodes figure out that a node left
// intentionally. When Node equals From, other nodes know for
// sure this node is gone.
d := dead{
Incarnation: state.Incarnation,
Node: state.Name,
From: state.Name,
}
m.deadNode(&d)
@ -598,7 +693,7 @@ func (m *Memberlist) anyAlive() bool {
m.nodeLock.RLock()
defer m.nodeLock.RUnlock()
for _, n := range m.nodes {
if n.State != stateDead && n.Name != m.config.Name {
if !n.DeadOrLeft() && n.Name != m.config.Name {
return true
}
}
@ -621,7 +716,7 @@ func (m *Memberlist) ProtocolVersion() uint8 {
return m.config.ProtocolVersion
}
// Shutdown will stop any background maintanence of network activity
// Shutdown will stop any background maintenance of network activity
// for this memberlist, causing it to appear "dead". A leave message
// will not be broadcasted prior, so the cluster being left will have
// to detect this node's shutdown using probing. If you wish to more
@ -657,3 +752,27 @@ func (m *Memberlist) hasShutdown() bool {
func (m *Memberlist) hasLeft() bool {
return atomic.LoadInt32(&m.leave) == 1
}
func (m *Memberlist) getNodeState(addr string) NodeStateType {
m.nodeLock.RLock()
defer m.nodeLock.RUnlock()
n := m.nodeMap[addr]
return n.State
}
func (m *Memberlist) getNodeStateChange(addr string) time.Time {
m.nodeLock.RLock()
defer m.nodeLock.RUnlock()
n := m.nodeMap[addr]
return n.StateChange
}
func (m *Memberlist) changeNode(addr string, f func(*nodeState)) {
m.nodeLock.Lock()
defer m.nodeLock.Unlock()
n := m.nodeMap[addr]
f(n)
}

View file

@ -1,7 +1,9 @@
package memberlist
import (
"bytes"
"fmt"
"io"
"net"
"strconv"
"time"
@ -10,26 +12,33 @@ import (
// MockNetwork is used as a factory that produces MockTransport instances which
// are uniquely addressed and wired up to talk to each other.
type MockNetwork struct {
transports map[string]*MockTransport
port int
transportsByAddr map[string]*MockTransport
transportsByName map[string]*MockTransport
port int
}
// NewTransport returns a new MockTransport with a unique address, wired up to
// talk to the other transports in the MockNetwork.
func (n *MockNetwork) NewTransport() *MockTransport {
func (n *MockNetwork) NewTransport(name string) *MockTransport {
n.port += 1
addr := fmt.Sprintf("127.0.0.1:%d", n.port)
transport := &MockTransport{
net: n,
addr: &MockAddress{addr},
addr: &MockAddress{addr, name},
packetCh: make(chan *Packet),
streamCh: make(chan net.Conn),
}
if n.transports == nil {
n.transports = make(map[string]*MockTransport)
if n.transportsByAddr == nil {
n.transportsByAddr = make(map[string]*MockTransport)
}
n.transports[addr] = transport
n.transportsByAddr[addr] = transport
if n.transportsByName == nil {
n.transportsByName = make(map[string]*MockTransport)
}
n.transportsByName[name] = transport
return transport
}
@ -37,6 +46,7 @@ func (n *MockNetwork) NewTransport() *MockTransport {
// address scheme.
type MockAddress struct {
addr string
name string
}
// See net.Addr.
@ -57,6 +67,8 @@ type MockTransport struct {
streamCh chan net.Conn
}
var _ NodeAwareTransport = (*MockTransport)(nil)
// See Transport.
func (t *MockTransport) FinalAdvertiseAddr(string, int) (net.IP, int, error) {
host, portStr, err := net.SplitHostPort(t.addr.String())
@ -79,9 +91,15 @@ func (t *MockTransport) FinalAdvertiseAddr(string, int) (net.IP, int, error) {
// See Transport.
func (t *MockTransport) WriteTo(b []byte, addr string) (time.Time, error) {
dest, ok := t.net.transports[addr]
if !ok {
return time.Time{}, fmt.Errorf("No route to %q", addr)
a := Address{Addr: addr, Name: ""}
return t.WriteToAddress(b, a)
}
// See NodeAwareTransport.
func (t *MockTransport) WriteToAddress(b []byte, a Address) (time.Time, error) {
dest, err := t.getPeer(a)
if err != nil {
return time.Time{}, err
}
now := time.Now()
@ -98,11 +116,45 @@ func (t *MockTransport) PacketCh() <-chan *Packet {
return t.packetCh
}
// See NodeAwareTransport.
func (t *MockTransport) IngestPacket(conn net.Conn, addr net.Addr, now time.Time, shouldClose bool) error {
if shouldClose {
defer conn.Close()
}
// Copy everything from the stream into packet buffer.
var buf bytes.Buffer
if _, err := io.Copy(&buf, conn); err != nil {
return fmt.Errorf("failed to read packet: %v", err)
}
// Check the length - it needs to have at least one byte to be a proper
// message. This is checked elsewhere for writes coming in directly from
// the UDP socket.
if n := buf.Len(); n < 1 {
return fmt.Errorf("packet too short (%d bytes) %s", n, LogAddress(addr))
}
// Inject the packet.
t.packetCh <- &Packet{
Buf: buf.Bytes(),
From: addr,
Timestamp: now,
}
return nil
}
// See Transport.
func (t *MockTransport) DialTimeout(addr string, timeout time.Duration) (net.Conn, error) {
dest, ok := t.net.transports[addr]
if !ok {
return nil, fmt.Errorf("No route to %q", addr)
a := Address{Addr: addr, Name: ""}
return t.DialAddressTimeout(a, timeout)
}
// See NodeAwareTransport.
func (t *MockTransport) DialAddressTimeout(a Address, timeout time.Duration) (net.Conn, error) {
dest, err := t.getPeer(a)
if err != nil {
return nil, err
}
p1, p2 := net.Pipe()
@ -115,7 +167,29 @@ func (t *MockTransport) StreamCh() <-chan net.Conn {
return t.streamCh
}
// See NodeAwareTransport.
func (t *MockTransport) IngestStream(conn net.Conn) error {
t.streamCh <- conn
return nil
}
// See Transport.
func (t *MockTransport) Shutdown() error {
return nil
}
func (t *MockTransport) getPeer(a Address) (*MockTransport, error) {
var (
dest *MockTransport
ok bool
)
if a.Name != "" {
dest, ok = t.net.transportsByName[a.Name]
} else {
dest, ok = t.net.transportsByAddr[a.Addr]
}
if !ok {
return nil, fmt.Errorf("No route to %s", a)
}
return dest, nil
}

View file

@ -7,10 +7,12 @@ import (
"fmt"
"hash/crc32"
"io"
"math"
"net"
"sync/atomic"
"time"
"github.com/armon/go-metrics"
metrics "github.com/armon/go-metrics"
"github.com/hashicorp/go-msgpack/codec"
)
@ -41,6 +43,9 @@ const (
type messageType uint8
// The list of available message types.
//
// WARNING: ONLY APPEND TO THIS LIST! The numeric values are part of the
// protocol itself.
const (
pingMsg messageType = iota
indirectPingMsg
@ -58,6 +63,13 @@ const (
errMsg
)
const (
// hasLabelMsg has a deliberately high value so that you can disambiguate
// it from the encryptionVersion header which is either 0/1 right now and
// also any of the existing messageTypes
hasLabelMsg messageType = 244
)
// compressionType is used to specify the compression algorithm
type compressionType uint8
@ -71,7 +83,8 @@ const (
compoundOverhead = 2 // Assumed overhead per entry in compoundHeader
userMsgOverhead = 1
blockingWarning = 10 * time.Millisecond // Warn if a UDP packet takes this long to process
maxPushStateBytes = 10 * 1024 * 1024
maxPushStateBytes = 20 * 1024 * 1024
maxPushPullRequests = 128 // Maximum number of concurrent push/pull requests
)
// ping request sent directly to node
@ -82,15 +95,28 @@ type ping struct {
// the intended recipient. This is to protect again an agent
// restart with a new name.
Node string
SourceAddr []byte `codec:",omitempty"` // Source address, used for a direct reply
SourcePort uint16 `codec:",omitempty"` // Source port, used for a direct reply
SourceNode string `codec:",omitempty"` // Source name, used for a direct reply
}
// indirect ping sent to an indirect ndoe
// indirect ping sent to an indirect node
type indirectPingReq struct {
SeqNo uint32
Target []byte
Port uint16
Node string
Nack bool // true if we'd like a nack back
// Node is sent so the target can verify they are
// the intended recipient. This is to protect against an agent
// restart with a new name.
Node string
Nack bool // true if we'd like a nack back
SourceAddr []byte `codec:",omitempty"` // Source address, used for a direct reply
SourcePort uint16 `codec:",omitempty"` // Source port, used for a direct reply
SourceNode string `codec:",omitempty"` // Source name, used for a direct reply
}
// ack response is sent for a ping
@ -161,7 +187,7 @@ type pushNodeState struct {
Port uint16
Meta []byte
Incarnation uint32
State nodeStateType
State NodeStateType
Vsn []uint8 // Protocol versions
}
@ -205,13 +231,38 @@ func (m *Memberlist) streamListen() {
// handleConn handles a single incoming stream connection from the transport.
func (m *Memberlist) handleConn(conn net.Conn) {
defer conn.Close()
m.logger.Printf("[DEBUG] memberlist: Stream connection %s", LogConn(conn))
defer conn.Close()
metrics.IncrCounter([]string{"memberlist", "tcp", "accept"}, 1)
metrics.IncrCounterWithLabels([]string{"memberlist", "tcp", "accept"}, 1, m.metricLabels)
conn.SetDeadline(time.Now().Add(m.config.TCPTimeout))
msgType, bufConn, dec, err := m.readStream(conn)
var (
streamLabel string
err error
)
conn, streamLabel, err = RemoveLabelHeaderFromStream(conn)
if err != nil {
m.logger.Printf("[ERR] memberlist: failed to receive and remove the stream label header: %s %s", err, LogConn(conn))
return
}
if m.config.SkipInboundLabelCheck {
if streamLabel != "" {
m.logger.Printf("[ERR] memberlist: unexpected double stream label header: %s", LogConn(conn))
return
}
// Set this from config so that the auth data assertions work below.
streamLabel = m.config.Label
}
if m.config.Label != streamLabel {
m.logger.Printf("[ERR] memberlist: discarding stream with unacceptable label %q: %s", streamLabel, LogConn(conn))
return
}
msgType, bufConn, dec, err := m.readStream(conn, streamLabel)
if err != nil {
if err != io.EOF {
m.logger.Printf("[ERR] memberlist: failed to receive: %s %s", err, LogConn(conn))
@ -223,7 +274,7 @@ func (m *Memberlist) handleConn(conn net.Conn) {
return
}
err = m.rawSendMsgStream(conn, out.Bytes())
err = m.rawSendMsgStream(conn, out.Bytes(), streamLabel)
if err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send error: %s %s", err, LogConn(conn))
return
@ -238,13 +289,23 @@ func (m *Memberlist) handleConn(conn net.Conn) {
m.logger.Printf("[ERR] memberlist: Failed to receive user message: %s %s", err, LogConn(conn))
}
case pushPullMsg:
// Increment counter of pending push/pulls
numConcurrent := atomic.AddUint32(&m.pushPullReq, 1)
defer atomic.AddUint32(&m.pushPullReq, ^uint32(0))
// Check if we have too many open push/pull requests
if numConcurrent >= maxPushPullRequests {
m.logger.Printf("[ERR] memberlist: Too many pending push/pull requests")
return
}
join, remoteNodes, userState, err := m.readRemoteState(bufConn, dec)
if err != nil {
m.logger.Printf("[ERR] memberlist: Failed to read remote state: %s %s", err, LogConn(conn))
return
}
if err := m.sendLocalState(conn, join); err != nil {
if err := m.sendLocalState(conn, join, streamLabel); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to push local state: %s %s", err, LogConn(conn))
return
}
@ -272,7 +333,7 @@ func (m *Memberlist) handleConn(conn net.Conn) {
return
}
err = m.rawSendMsgStream(conn, out.Bytes())
err = m.rawSendMsgStream(conn, out.Bytes(), streamLabel)
if err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send ack: %s %s", err, LogConn(conn))
return
@ -297,10 +358,35 @@ func (m *Memberlist) packetListen() {
}
func (m *Memberlist) ingestPacket(buf []byte, from net.Addr, timestamp time.Time) {
var (
packetLabel string
err error
)
buf, packetLabel, err = RemoveLabelHeaderFromPacket(buf)
if err != nil {
m.logger.Printf("[ERR] memberlist: %v %s", err, LogAddress(from))
return
}
if m.config.SkipInboundLabelCheck {
if packetLabel != "" {
m.logger.Printf("[ERR] memberlist: unexpected double packet label header: %s", LogAddress(from))
return
}
// Set this from config so that the auth data assertions work below.
packetLabel = m.config.Label
}
if m.config.Label != packetLabel {
m.logger.Printf("[ERR] memberlist: discarding packet with unacceptable label %q: %s", packetLabel, LogAddress(from))
return
}
// Check if encryption is enabled
if m.config.EncryptionEnabled() {
// Decrypt the payload
plain, err := decryptPayload(m.config.Keyring.GetKeys(), buf, nil)
authData := []byte(packetLabel)
plain, err := decryptPayload(m.config.Keyring.GetKeys(), buf, authData)
if err != nil {
if !m.config.GossipVerifyIncoming {
// Treat the message as plaintext
@ -330,6 +416,10 @@ func (m *Memberlist) ingestPacket(buf []byte, from net.Addr, timestamp time.Time
}
func (m *Memberlist) handleCommand(buf []byte, from net.Addr, timestamp time.Time) {
if len(buf) < 1 {
m.logger.Printf("[ERR] memberlist: missing message type byte %s", LogAddress(from))
return
}
// Decode the message type
msgType := messageType(buf[0])
buf = buf[1:]
@ -357,10 +447,25 @@ func (m *Memberlist) handleCommand(buf []byte, from net.Addr, timestamp time.Tim
case deadMsg:
fallthrough
case userMsg:
select {
case m.handoff <- msgHandoff{msgType, buf, from}:
default:
// Determine the message queue, prioritize alive
queue := m.lowPriorityMsgQueue
if msgType == aliveMsg {
queue = m.highPriorityMsgQueue
}
// Check for overflow and append if not full
m.msgQueueLock.Lock()
if queue.Len() >= m.config.HandoffQueueDepth {
m.logger.Printf("[WARN] memberlist: handler queue full, dropping message (%d) %s", msgType, LogAddress(from))
} else {
queue.PushBack(msgHandoff{msgType, buf, from})
}
m.msgQueueLock.Unlock()
// Notify of pending message
select {
case m.handoffCh <- struct{}{}:
default:
}
default:
@ -368,28 +473,51 @@ func (m *Memberlist) handleCommand(buf []byte, from net.Addr, timestamp time.Tim
}
}
// getNextMessage returns the next message to process in priority order, using LIFO
func (m *Memberlist) getNextMessage() (msgHandoff, bool) {
m.msgQueueLock.Lock()
defer m.msgQueueLock.Unlock()
if el := m.highPriorityMsgQueue.Back(); el != nil {
m.highPriorityMsgQueue.Remove(el)
msg := el.Value.(msgHandoff)
return msg, true
} else if el := m.lowPriorityMsgQueue.Back(); el != nil {
m.lowPriorityMsgQueue.Remove(el)
msg := el.Value.(msgHandoff)
return msg, true
}
return msgHandoff{}, false
}
// packetHandler is a long running goroutine that processes messages received
// over the packet interface, but is decoupled from the listener to avoid
// blocking the listener which may cause ping/ack messages to be delayed.
func (m *Memberlist) packetHandler() {
for {
select {
case msg := <-m.handoff:
msgType := msg.msgType
buf := msg.buf
from := msg.from
case <-m.handoffCh:
for {
msg, ok := m.getNextMessage()
if !ok {
break
}
msgType := msg.msgType
buf := msg.buf
from := msg.from
switch msgType {
case suspectMsg:
m.handleSuspect(buf, from)
case aliveMsg:
m.handleAlive(buf, from)
case deadMsg:
m.handleDead(buf, from)
case userMsg:
m.handleUser(buf, from)
default:
m.logger.Printf("[ERR] memberlist: Message type (%d) not supported %s (packet handler)", msgType, LogAddress(from))
switch msgType {
case suspectMsg:
m.handleSuspect(buf, from)
case aliveMsg:
m.handleAlive(buf, from)
case deadMsg:
m.handleDead(buf, from)
case userMsg:
m.handleUser(buf, from)
default:
m.logger.Printf("[ERR] memberlist: Message type (%d) not supported %s (packet handler)", msgType, LogAddress(from))
}
}
case <-m.shutdownCh:
@ -433,7 +561,19 @@ func (m *Memberlist) handlePing(buf []byte, from net.Addr) {
if m.config.Ping != nil {
ack.Payload = m.config.Ping.AckPayload()
}
if err := m.encodeAndSendMsg(from.String(), ackRespMsg, &ack); err != nil {
addr := ""
if len(p.SourceAddr) > 0 && p.SourcePort > 0 {
addr = joinHostPort(net.IP(p.SourceAddr).String(), p.SourcePort)
} else {
addr = from.String()
}
a := Address{
Addr: addr,
Name: p.SourceNode,
}
if err := m.encodeAndSendMsg(a, ackRespMsg, &ack); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send ack: %s %s", err, LogAddress(from))
}
}
@ -453,7 +593,25 @@ func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) {
// Send a ping to the correct host.
localSeqNo := m.nextSeqNo()
ping := ping{SeqNo: localSeqNo, Node: ind.Node}
selfAddr, selfPort := m.getAdvertise()
ping := ping{
SeqNo: localSeqNo,
Node: ind.Node,
// The outbound message is addressed FROM us.
SourceAddr: selfAddr,
SourcePort: selfPort,
SourceNode: m.config.Name,
}
// Forward the ack back to the requestor. If the request encodes an origin
// use that otherwise assume that the other end of the UDP socket is
// usable.
indAddr := ""
if len(ind.SourceAddr) > 0 && ind.SourcePort > 0 {
indAddr = joinHostPort(net.IP(ind.SourceAddr).String(), ind.SourcePort)
} else {
indAddr = from.String()
}
// Setup a response handler to relay the ack
cancelCh := make(chan struct{})
@ -461,18 +619,25 @@ func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) {
// Try to prevent the nack if we've caught it in time.
close(cancelCh)
// Forward the ack back to the requestor.
ack := ackResp{ind.SeqNo, nil}
if err := m.encodeAndSendMsg(from.String(), ackRespMsg, &ack); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to forward ack: %s %s", err, LogAddress(from))
a := Address{
Addr: indAddr,
Name: ind.SourceNode,
}
if err := m.encodeAndSendMsg(a, ackRespMsg, &ack); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to forward ack: %s %s", err, LogStringAddress(indAddr))
}
}
m.setAckHandler(localSeqNo, respHandler, m.config.ProbeTimeout)
// Send the ping.
addr := joinHostPort(net.IP(ind.Target).String(), ind.Port)
if err := m.encodeAndSendMsg(addr, pingMsg, &ping); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send ping: %s %s", err, LogAddress(from))
a := Address{
Addr: addr,
Name: ind.Node,
}
if err := m.encodeAndSendMsg(a, pingMsg, &ping); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send indirect ping: %s %s", err, LogStringAddress(indAddr))
}
// Setup a timer to fire off a nack if no ack is seen in time.
@ -483,8 +648,12 @@ func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) {
return
case <-time.After(m.config.ProbeTimeout):
nack := nackResp{ind.SeqNo}
if err := m.encodeAndSendMsg(from.String(), nackRespMsg, &nack); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send nack: %s %s", err, LogAddress(from))
a := Address{
Addr: indAddr,
Name: ind.SourceNode,
}
if err := m.encodeAndSendMsg(a, nackRespMsg, &nack); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send nack: %s %s", err, LogStringAddress(indAddr))
}
}
}()
@ -518,12 +687,47 @@ func (m *Memberlist) handleSuspect(buf []byte, from net.Addr) {
m.suspectNode(&sus)
}
// ensureCanConnect return the IP from a RemoteAddress
// return error if this client must not connect
func (m *Memberlist) ensureCanConnect(from net.Addr) error {
if !m.config.IPMustBeChecked() {
return nil
}
source := from.String()
if source == "pipe" {
return nil
}
host, _, err := net.SplitHostPort(source)
if err != nil {
return err
}
ip := net.ParseIP(host)
if ip == nil {
return fmt.Errorf("Cannot parse IP from %s", host)
}
return m.config.IPAllowed(ip)
}
func (m *Memberlist) handleAlive(buf []byte, from net.Addr) {
if err := m.ensureCanConnect(from); err != nil {
m.logger.Printf("[DEBUG] memberlist: Blocked alive message: %s %s", err, LogAddress(from))
return
}
var live alive
if err := decode(buf, &live); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to decode alive message: %s %s", err, LogAddress(from))
return
}
if m.config.IPMustBeChecked() {
innerIP := net.IP(live.Addr)
if innerIP != nil {
if err := m.config.IPAllowed(innerIP); err != nil {
m.logger.Printf("[DEBUG] memberlist: Blocked alive.Addr=%s message from: %s %s", innerIP.String(), err, LogAddress(from))
return
}
}
}
// For proto versions < 2, there is no port provided. Mask old
// behavior by using the configured port
@ -565,12 +769,12 @@ func (m *Memberlist) handleCompressed(buf []byte, from net.Addr, timestamp time.
}
// encodeAndSendMsg is used to combine the encoding and sending steps
func (m *Memberlist) encodeAndSendMsg(addr string, msgType messageType, msg interface{}) error {
func (m *Memberlist) encodeAndSendMsg(a Address, msgType messageType, msg interface{}) error {
out, err := encode(msgType, msg)
if err != nil {
return err
}
if err := m.sendMsg(addr, out.Bytes()); err != nil {
if err := m.sendMsg(a, out.Bytes()); err != nil {
return err
}
return nil
@ -578,9 +782,9 @@ func (m *Memberlist) encodeAndSendMsg(addr string, msgType messageType, msg inte
// sendMsg is used to send a message via packet to another host. It will
// opportunistically create a compoundMsg and piggy back other broadcasts.
func (m *Memberlist) sendMsg(addr string, msg []byte) error {
func (m *Memberlist) sendMsg(a Address, msg []byte) error {
// Check if we can piggy back any messages
bytesAvail := m.config.UDPBufferSize - len(msg) - compoundHeaderOverhead
bytesAvail := m.config.UDPBufferSize - len(msg) - compoundHeaderOverhead - labelOverhead(m.config.Label)
if m.config.EncryptionEnabled() && m.config.GossipVerifyOutgoing {
bytesAvail -= encryptOverhead(m.encryptionVersion())
}
@ -588,7 +792,7 @@ func (m *Memberlist) sendMsg(addr string, msg []byte) error {
// Fast path if nothing to piggypack
if len(extra) == 0 {
return m.rawSendMsgPacket(addr, nil, msg)
return m.rawSendMsgPacket(a, nil, msg)
}
// Join all the messages
@ -600,12 +804,16 @@ func (m *Memberlist) sendMsg(addr string, msg []byte) error {
compound := makeCompoundMessage(msgs)
// Send the message
return m.rawSendMsgPacket(addr, nil, compound.Bytes())
return m.rawSendMsgPacket(a, nil, compound.Bytes())
}
// rawSendMsgPacket is used to send message via packet to another host without
// modification, other than compression or encryption if enabled.
func (m *Memberlist) rawSendMsgPacket(addr string, node *Node, msg []byte) error {
func (m *Memberlist) rawSendMsgPacket(a Address, node *Node, msg []byte) error {
if a.Name == "" && m.config.RequireNodeNames {
return errNodeNamesAreRequired
}
// Check if we have compression enabled
if m.config.EnableCompression {
buf, err := compressPayload(msg)
@ -619,11 +827,12 @@ func (m *Memberlist) rawSendMsgPacket(addr string, node *Node, msg []byte) error
}
}
// Try to look up the destination node
// Try to look up the destination node. Note this will only work if the
// bare ip address is used as the node name, which is not guaranteed.
if node == nil {
toAddr, _, err := net.SplitHostPort(addr)
toAddr, _, err := net.SplitHostPort(a.Addr)
if err != nil {
m.logger.Printf("[ERR] memberlist: Failed to parse address %q: %v", addr, err)
m.logger.Printf("[ERR] memberlist: Failed to parse address %q: %v", a.Addr, err)
return err
}
m.nodeLock.RLock()
@ -647,9 +856,12 @@ func (m *Memberlist) rawSendMsgPacket(addr string, node *Node, msg []byte) error
// Check if we have encryption enabled
if m.config.EncryptionEnabled() && m.config.GossipVerifyOutgoing {
// Encrypt the payload
var buf bytes.Buffer
primaryKey := m.config.Keyring.GetPrimaryKey()
err := encryptPayload(m.encryptionVersion(), primaryKey, msg, nil, &buf)
var (
primaryKey = m.config.Keyring.GetPrimaryKey()
packetLabel = []byte(m.config.Label)
buf bytes.Buffer
)
err := encryptPayload(m.encryptionVersion(), primaryKey, msg, packetLabel, &buf)
if err != nil {
m.logger.Printf("[ERR] memberlist: Encryption of message failed: %v", err)
return err
@ -657,15 +869,15 @@ func (m *Memberlist) rawSendMsgPacket(addr string, node *Node, msg []byte) error
msg = buf.Bytes()
}
metrics.IncrCounter([]string{"memberlist", "udp", "sent"}, float32(len(msg)))
_, err := m.transport.WriteTo(msg, addr)
metrics.IncrCounterWithLabels([]string{"memberlist", "udp", "sent"}, float32(len(msg)), m.metricLabels)
_, err := m.transport.WriteToAddress(msg, a)
return err
}
// rawSendMsgStream is used to stream a message to another host without
// modification, other than applying compression and encryption if enabled.
func (m *Memberlist) rawSendMsgStream(conn net.Conn, sendBuf []byte) error {
// Check if compresion is enabled
func (m *Memberlist) rawSendMsgStream(conn net.Conn, sendBuf []byte, streamLabel string) error {
// Check if compression is enabled
if m.config.EnableCompression {
compBuf, err := compressPayload(sendBuf)
if err != nil {
@ -677,7 +889,7 @@ func (m *Memberlist) rawSendMsgStream(conn net.Conn, sendBuf []byte) error {
// Check if encryption is enabled
if m.config.EncryptionEnabled() && m.config.GossipVerifyOutgoing {
crypt, err := m.encryptLocalState(sendBuf)
crypt, err := m.encryptLocalState(sendBuf, streamLabel)
if err != nil {
m.logger.Printf("[ERROR] memberlist: Failed to encrypt local state: %v", err)
return err
@ -686,7 +898,7 @@ func (m *Memberlist) rawSendMsgStream(conn net.Conn, sendBuf []byte) error {
}
// Write out the entire send buffer
metrics.IncrCounter([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf)))
metrics.IncrCounterWithLabels([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf)), m.metricLabels)
if n, err := conn.Write(sendBuf); err != nil {
return err
@ -698,8 +910,12 @@ func (m *Memberlist) rawSendMsgStream(conn net.Conn, sendBuf []byte) error {
}
// sendUserMsg is used to stream a user message to another host.
func (m *Memberlist) sendUserMsg(addr string, sendBuf []byte) error {
conn, err := m.transport.DialTimeout(addr, m.config.TCPTimeout)
func (m *Memberlist) sendUserMsg(a Address, sendBuf []byte) error {
if a.Name == "" && m.config.RequireNodeNames {
return errNodeNamesAreRequired
}
conn, err := m.transport.DialAddressTimeout(a, m.config.TCPTimeout)
if err != nil {
return err
}
@ -719,28 +935,33 @@ func (m *Memberlist) sendUserMsg(addr string, sendBuf []byte) error {
if _, err := bufConn.Write(sendBuf); err != nil {
return err
}
return m.rawSendMsgStream(conn, bufConn.Bytes())
return m.rawSendMsgStream(conn, bufConn.Bytes(), m.config.Label)
}
// sendAndReceiveState is used to initiate a push/pull over a stream with a
// remote host.
func (m *Memberlist) sendAndReceiveState(addr string, join bool) ([]pushNodeState, []byte, error) {
func (m *Memberlist) sendAndReceiveState(a Address, join bool) ([]pushNodeState, []byte, error) {
if a.Name == "" && m.config.RequireNodeNames {
return nil, nil, errNodeNamesAreRequired
}
// Attempt to connect
conn, err := m.transport.DialTimeout(addr, m.config.TCPTimeout)
conn, err := m.transport.DialAddressTimeout(a, m.config.TCPTimeout)
if err != nil {
return nil, nil, err
}
defer conn.Close()
m.logger.Printf("[DEBUG] memberlist: Initiating push/pull sync with: %s", conn.RemoteAddr())
metrics.IncrCounter([]string{"memberlist", "tcp", "connect"}, 1)
m.logger.Printf("[DEBUG] memberlist: Initiating push/pull sync with: %s %s", a.Name, conn.RemoteAddr())
metrics.IncrCounterWithLabels([]string{"memberlist", "tcp", "connect"}, 1, m.metricLabels)
// Send our state
if err := m.sendLocalState(conn, join); err != nil {
if err := m.sendLocalState(conn, join, m.config.Label); err != nil {
return nil, nil, err
}
conn.SetDeadline(time.Now().Add(m.config.TCPTimeout))
msgType, bufConn, dec, err := m.readStream(conn)
msgType, bufConn, dec, err := m.readStream(conn, m.config.Label)
if err != nil {
return nil, nil, err
}
@ -765,7 +986,7 @@ func (m *Memberlist) sendAndReceiveState(addr string, join bool) ([]pushNodeStat
}
// sendLocalState is invoked to send our local state over a stream connection.
func (m *Memberlist) sendLocalState(conn net.Conn, join bool) error {
func (m *Memberlist) sendLocalState(conn net.Conn, join bool, streamLabel string) error {
// Setup a deadline
conn.SetDeadline(time.Now().Add(m.config.TCPTimeout))
@ -822,11 +1043,11 @@ func (m *Memberlist) sendLocalState(conn net.Conn, join bool) error {
}
// Get the send buffer
return m.rawSendMsgStream(conn, bufConn.Bytes())
return m.rawSendMsgStream(conn, bufConn.Bytes(), streamLabel)
}
// encryptLocalState is used to help encrypt local state before sending
func (m *Memberlist) encryptLocalState(sendBuf []byte) ([]byte, error) {
func (m *Memberlist) encryptLocalState(sendBuf []byte, streamLabel string) ([]byte, error) {
var buf bytes.Buffer
// Write the encryptMsg byte
@ -839,9 +1060,15 @@ func (m *Memberlist) encryptLocalState(sendBuf []byte) ([]byte, error) {
binary.BigEndian.PutUint32(sizeBuf, uint32(encLen))
buf.Write(sizeBuf)
// Authenticated Data is:
//
// [messageType; byte] [messageLength; uint32] [stream_label; optional]
//
dataBytes := appendBytes(buf.Bytes()[:5], []byte(streamLabel))
// Write the encrypted cipher text to the buffer
key := m.config.Keyring.GetPrimaryKey()
err := encryptPayload(encVsn, key, sendBuf, buf.Bytes()[:5], &buf)
err := encryptPayload(encVsn, key, sendBuf, dataBytes, &buf)
if err != nil {
return nil, err
}
@ -849,7 +1076,7 @@ func (m *Memberlist) encryptLocalState(sendBuf []byte) ([]byte, error) {
}
// decryptRemoteState is used to help decrypt the remote state
func (m *Memberlist) decryptRemoteState(bufConn io.Reader) ([]byte, error) {
func (m *Memberlist) decryptRemoteState(bufConn io.Reader, streamLabel string) ([]byte, error) {
// Read in enough to determine message length
cipherText := bytes.NewBuffer(nil)
cipherText.WriteByte(byte(encryptMsg))
@ -863,6 +1090,12 @@ func (m *Memberlist) decryptRemoteState(bufConn io.Reader) ([]byte, error) {
moreBytes := binary.BigEndian.Uint32(cipherText.Bytes()[1:5])
if moreBytes > maxPushStateBytes {
return nil, fmt.Errorf("Remote node state is larger than limit (%d)", moreBytes)
}
//Start reporting the size before you cross the limit
if moreBytes > uint32(math.Floor(.6*maxPushStateBytes)) {
m.logger.Printf("[WARN] memberlist: Remote node state size is (%d) limit is (%d)", moreBytes, maxPushStateBytes)
}
// Read in the rest of the payload
@ -871,8 +1104,13 @@ func (m *Memberlist) decryptRemoteState(bufConn io.Reader) ([]byte, error) {
return nil, err
}
// Decrypt the cipherText
dataBytes := cipherText.Bytes()[:5]
// Decrypt the cipherText with some authenticated data
//
// Authenticated Data is:
//
// [messageType; byte] [messageLength; uint32] [label_data; optional]
//
dataBytes := appendBytes(cipherText.Bytes()[:5], []byte(streamLabel))
cipherBytes := cipherText.Bytes()[5:]
// Decrypt the payload
@ -880,15 +1118,18 @@ func (m *Memberlist) decryptRemoteState(bufConn io.Reader) ([]byte, error) {
return decryptPayload(keys, cipherBytes, dataBytes)
}
// readStream is used to read from a stream connection, decrypting and
// readStream is used to read messages from a stream connection, decrypting and
// decompressing the stream if necessary.
func (m *Memberlist) readStream(conn net.Conn) (messageType, io.Reader, *codec.Decoder, error) {
//
// The provided streamLabel if present will be authenticated during decryption
// of each message.
func (m *Memberlist) readStream(conn net.Conn, streamLabel string) (messageType, io.Reader, *codec.Decoder, error) {
// Created a buffered reader
var bufConn io.Reader = bufio.NewReader(conn)
// Read the message type
buf := [1]byte{0}
if _, err := bufConn.Read(buf[:]); err != nil {
if _, err := io.ReadFull(bufConn, buf[:]); err != nil {
return 0, nil, nil, err
}
msgType := messageType(buf[0])
@ -900,7 +1141,7 @@ func (m *Memberlist) readStream(conn net.Conn) (messageType, io.Reader, *codec.D
fmt.Errorf("Remote state is encrypted and encryption is not configured")
}
plain, err := m.decryptRemoteState(bufConn)
plain, err := m.decryptRemoteState(bufConn, streamLabel)
if err != nil {
return 0, nil, nil, err
}
@ -996,16 +1237,17 @@ func (m *Memberlist) mergeRemoteState(join bool, remoteNodes []pushNodeState, us
nodes := make([]*Node, len(remoteNodes))
for idx, n := range remoteNodes {
nodes[idx] = &Node{
Name: n.Name,
Addr: n.Addr,
Port: n.Port,
Meta: n.Meta,
PMin: n.Vsn[0],
PMax: n.Vsn[1],
PCur: n.Vsn[2],
DMin: n.Vsn[3],
DMax: n.Vsn[4],
DCur: n.Vsn[5],
Name: n.Name,
Addr: n.Addr,
Port: n.Port,
Meta: n.Meta,
State: n.State,
PMin: n.Vsn[0],
PMax: n.Vsn[1],
PCur: n.Vsn[2],
DMin: n.Vsn[3],
DMax: n.Vsn[4],
DCur: n.Vsn[5],
}
}
if err := m.config.Merge.NotifyMerge(nodes); err != nil {
@ -1058,8 +1300,12 @@ func (m *Memberlist) readUserMsg(bufConn io.Reader, dec *codec.Decoder) error {
// a ping, and waits for an ack. All of this is done as a series of blocking
// operations, given the deadline. The bool return parameter is true if we
// we able to round trip a ping to the other node.
func (m *Memberlist) sendPingAndWaitForAck(addr string, ping ping, deadline time.Time) (bool, error) {
conn, err := m.transport.DialTimeout(addr, deadline.Sub(time.Now()))
func (m *Memberlist) sendPingAndWaitForAck(a Address, ping ping, deadline time.Time) (bool, error) {
if a.Name == "" && m.config.RequireNodeNames {
return false, errNodeNamesAreRequired
}
conn, err := m.transport.DialAddressTimeout(a, deadline.Sub(time.Now()))
if err != nil {
// If the node is actually dead we expect this to fail, so we
// shouldn't spam the logs with it. After this point, errors
@ -1075,11 +1321,11 @@ func (m *Memberlist) sendPingAndWaitForAck(addr string, ping ping, deadline time
return false, err
}
if err = m.rawSendMsgStream(conn, out.Bytes()); err != nil {
if err = m.rawSendMsgStream(conn, out.Bytes(), m.config.Label); err != nil {
return false, err
}
msgType, _, dec, err := m.readStream(conn)
msgType, _, dec, err := m.readStream(conn, m.config.Label)
if err != nil {
return false, err
}
@ -1094,7 +1340,7 @@ func (m *Memberlist) sendPingAndWaitForAck(addr string, ping ping, deadline time
}
if ack.SeqNo != ping.SeqNo {
return false, fmt.Errorf("Sequence number from ack (%d) doesn't match ping (%d)", ack.SeqNo, ping.SeqNo, LogConn(conn))
return false, fmt.Errorf("Sequence number from ack (%d) doesn't match ping (%d)", ack.SeqNo, ping.SeqNo)
}
return true, nil

View file

@ -1,7 +1,9 @@
package memberlist
import (
"bytes"
"fmt"
"io"
"log"
"net"
"sync"
@ -33,6 +35,10 @@ type NetTransportConfig struct {
// Logger is a logger for operator messages.
Logger *log.Logger
// MetricLabels is a map of optional labels to apply to all metrics
// emitted by this transport.
MetricLabels []metrics.Label
}
// NetTransport is a Transport implementation that uses connectionless UDP for
@ -46,8 +52,12 @@ type NetTransport struct {
tcpListeners []*net.TCPListener
udpListeners []*net.UDPConn
shutdown int32
metricLabels []metrics.Label
}
var _ NodeAwareTransport = (*NetTransport)(nil)
// NewNetTransport returns a net transport with the given configuration. On
// success all the network listeners will be created and listening.
func NewNetTransport(config *NetTransportConfig) (*NetTransport, error) {
@ -60,10 +70,11 @@ func NewNetTransport(config *NetTransportConfig) (*NetTransport, error) {
// Build out the new transport.
var ok bool
t := NetTransport{
config: config,
packetCh: make(chan *Packet),
streamCh: make(chan net.Conn),
logger: config.Logger,
config: config,
packetCh: make(chan *Packet),
streamCh: make(chan net.Conn),
logger: config.Logger,
metricLabels: config.MetricLabels,
}
// Clean up listeners if there's an error.
@ -170,6 +181,14 @@ func (t *NetTransport) FinalAdvertiseAddr(ip string, port int) (net.IP, int, err
// See Transport.
func (t *NetTransport) WriteTo(b []byte, addr string) (time.Time, error) {
a := Address{Addr: addr, Name: ""}
return t.WriteToAddress(b, a)
}
// See NodeAwareTransport.
func (t *NetTransport) WriteToAddress(b []byte, a Address) (time.Time, error) {
addr := a.Addr
udpAddr, err := net.ResolveUDPAddr("udp", addr)
if err != nil {
return time.Time{}, err
@ -188,8 +207,44 @@ func (t *NetTransport) PacketCh() <-chan *Packet {
return t.packetCh
}
// See IngestionAwareTransport.
func (t *NetTransport) IngestPacket(conn net.Conn, addr net.Addr, now time.Time, shouldClose bool) error {
if shouldClose {
defer conn.Close()
}
// Copy everything from the stream into packet buffer.
var buf bytes.Buffer
if _, err := io.Copy(&buf, conn); err != nil {
return fmt.Errorf("failed to read packet: %v", err)
}
// Check the length - it needs to have at least one byte to be a proper
// message. This is checked elsewhere for writes coming in directly from
// the UDP socket.
if n := buf.Len(); n < 1 {
return fmt.Errorf("packet too short (%d bytes) %s", n, LogAddress(addr))
}
// Inject the packet.
t.packetCh <- &Packet{
Buf: buf.Bytes(),
From: addr,
Timestamp: now,
}
return nil
}
// See Transport.
func (t *NetTransport) DialTimeout(addr string, timeout time.Duration) (net.Conn, error) {
a := Address{Addr: addr, Name: ""}
return t.DialAddressTimeout(a, timeout)
}
// See NodeAwareTransport.
func (t *NetTransport) DialAddressTimeout(a Address, timeout time.Duration) (net.Conn, error) {
addr := a.Addr
dialer := net.Dialer{Timeout: timeout}
return dialer.Dial("tcp", addr)
}
@ -199,6 +254,12 @@ func (t *NetTransport) StreamCh() <-chan net.Conn {
return t.streamCh
}
// See IngestionAwareTransport.
func (t *NetTransport) IngestStream(conn net.Conn) error {
t.streamCh <- conn
return nil
}
// See Transport.
func (t *NetTransport) Shutdown() error {
// This will avoid log spam about errors when we shut down.
@ -221,6 +282,16 @@ func (t *NetTransport) Shutdown() error {
// and hands them off to the stream channel.
func (t *NetTransport) tcpListen(tcpLn *net.TCPListener) {
defer t.wg.Done()
// baseDelay is the initial delay after an AcceptTCP() error before attempting again
const baseDelay = 5 * time.Millisecond
// maxDelay is the maximum delay after an AcceptTCP() error before attempting again.
// In the case that tcpListen() is error-looping, it will delay the shutdown check.
// Therefore, changes to maxDelay may have an effect on the latency of shutdown.
const maxDelay = 1 * time.Second
var loopDelay time.Duration
for {
conn, err := tcpLn.AcceptTCP()
if err != nil {
@ -228,9 +299,22 @@ func (t *NetTransport) tcpListen(tcpLn *net.TCPListener) {
break
}
if loopDelay == 0 {
loopDelay = baseDelay
} else {
loopDelay *= 2
}
if loopDelay > maxDelay {
loopDelay = maxDelay
}
t.logger.Printf("[ERR] memberlist: Error accepting TCP connection: %v", err)
time.Sleep(loopDelay)
continue
}
// No error, reset loop delay
loopDelay = 0
t.streamCh <- conn
}
@ -264,7 +348,7 @@ func (t *NetTransport) udpListen(udpLn *net.UDPConn) {
}
// Ingest the packet.
metrics.IncrCounter([]string{"memberlist", "udp", "received"}, float32(n))
metrics.IncrCounterWithLabels([]string{"memberlist", "udp", "received"}, float32(n), t.metricLabels)
t.packetCh <- &Packet{
Buf: buf[:n],
From: addr,

48
vendor/github.com/hashicorp/memberlist/peeked_conn.go generated vendored Normal file
View file

@ -0,0 +1,48 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Originally from: https://github.com/google/tcpproxy/blob/master/tcpproxy.go
// at f5c09fbedceb69e4b238dec52cdf9f2fe9a815e2
package memberlist
import "net"
// peekedConn is an incoming connection that has had some bytes read from it
// to determine how to route the connection. The Read method stitches
// the peeked bytes and unread bytes back together.
type peekedConn struct {
// Peeked are the bytes that have been read from Conn for the
// purposes of route matching, but have not yet been consumed
// by Read calls. It set to nil by Read when fully consumed.
Peeked []byte
// Conn is the underlying connection.
// It can be type asserted against *net.TCPConn or other types
// as needed. It should not be read from directly unless
// Peeked is nil.
net.Conn
}
func (c *peekedConn) Read(p []byte) (n int, err error) {
if len(c.Peeked) > 0 {
n = copy(p, c.Peeked)
c.Peeked = c.Peeked[n:]
if len(c.Peeked) == 0 {
c.Peeked = nil
}
return n, nil
}
return c.Conn.Read(p)
}

View file

@ -1,8 +1,10 @@
package memberlist
import (
"sort"
"math"
"sync"
"github.com/google/btree"
)
// TransmitLimitedQueue is used to queue messages to broadcast to
@ -19,15 +21,93 @@ type TransmitLimitedQueue struct {
// number of retransmissions attempted.
RetransmitMult int
sync.Mutex
bcQueue limitedBroadcasts
mu sync.Mutex
tq *btree.BTree // stores *limitedBroadcast as btree.Item
tm map[string]*limitedBroadcast
idGen int64
}
type limitedBroadcast struct {
transmits int // Number of transmissions attempted.
transmits int // btree-key[0]: Number of transmissions attempted.
msgLen int64 // btree-key[1]: copied from len(b.Message())
id int64 // btree-key[2]: unique incrementing id stamped at submission time
b Broadcast
name string // set if Broadcast is a NamedBroadcast
}
// Less tests whether the current item is less than the given argument.
//
// This must provide a strict weak ordering.
// If !a.Less(b) && !b.Less(a), we treat this to mean a == b (i.e. we can only
// hold one of either a or b in the tree).
//
// default ordering is
// - [transmits=0, ..., transmits=inf]
// - [transmits=0:len=999, ..., transmits=0:len=2, ...]
// - [transmits=0:len=999,id=999, ..., transmits=0:len=999:id=1, ...]
func (b *limitedBroadcast) Less(than btree.Item) bool {
o := than.(*limitedBroadcast)
if b.transmits < o.transmits {
return true
} else if b.transmits > o.transmits {
return false
}
if b.msgLen > o.msgLen {
return true
} else if b.msgLen < o.msgLen {
return false
}
return b.id > o.id
}
// for testing; emits in transmit order if reverse=false
func (q *TransmitLimitedQueue) orderedView(reverse bool) []*limitedBroadcast {
q.mu.Lock()
defer q.mu.Unlock()
out := make([]*limitedBroadcast, 0, q.lenLocked())
q.walkReadOnlyLocked(reverse, func(cur *limitedBroadcast) bool {
out = append(out, cur)
return true
})
return out
}
// walkReadOnlyLocked calls f for each item in the queue traversing it in
// natural order (by Less) when reverse=false and the opposite when true. You
// must hold the mutex.
//
// This method panics if you attempt to mutate the item during traversal. The
// underlying btree should also not be mutated during traversal.
func (q *TransmitLimitedQueue) walkReadOnlyLocked(reverse bool, f func(*limitedBroadcast) bool) {
if q.lenLocked() == 0 {
return
}
iter := func(item btree.Item) bool {
cur := item.(*limitedBroadcast)
prevTransmits := cur.transmits
prevMsgLen := cur.msgLen
prevID := cur.id
keepGoing := f(cur)
if prevTransmits != cur.transmits || prevMsgLen != cur.msgLen || prevID != cur.id {
panic("edited queue while walking read only")
}
return keepGoing
}
if reverse {
q.tq.Descend(iter) // end with transmit 0
} else {
q.tq.Ascend(iter) // start with transmit 0
}
}
type limitedBroadcasts []*limitedBroadcast
// Broadcast is something that can be broadcasted via gossip to
// the memberlist cluster.
@ -45,123 +125,298 @@ type Broadcast interface {
Finished()
}
// NamedBroadcast is an optional extension of the Broadcast interface that
// gives each message a unique string name, and that is used to optimize
//
// You shoud ensure that Invalidates() checks the same uniqueness as the
// example below:
//
// func (b *foo) Invalidates(other Broadcast) bool {
// nb, ok := other.(NamedBroadcast)
// if !ok {
// return false
// }
// return b.Name() == nb.Name()
// }
//
// Invalidates() isn't currently used for NamedBroadcasts, but that may change
// in the future.
type NamedBroadcast interface {
Broadcast
// The unique identity of this broadcast message.
Name() string
}
// UniqueBroadcast is an optional interface that indicates that each message is
// intrinsically unique and there is no need to scan the broadcast queue for
// duplicates.
//
// You should ensure that Invalidates() always returns false if implementing
// this interface. Invalidates() isn't currently used for UniqueBroadcasts, but
// that may change in the future.
type UniqueBroadcast interface {
Broadcast
// UniqueBroadcast is just a marker method for this interface.
UniqueBroadcast()
}
// QueueBroadcast is used to enqueue a broadcast
func (q *TransmitLimitedQueue) QueueBroadcast(b Broadcast) {
q.Lock()
defer q.Unlock()
q.queueBroadcast(b, 0)
}
// Check if this message invalidates another
n := len(q.bcQueue)
for i := 0; i < n; i++ {
if b.Invalidates(q.bcQueue[i].b) {
q.bcQueue[i].b.Finished()
copy(q.bcQueue[i:], q.bcQueue[i+1:])
q.bcQueue[n-1] = nil
q.bcQueue = q.bcQueue[:n-1]
n--
// lazyInit initializes internal data structures the first time they are
// needed. You must already hold the mutex.
func (q *TransmitLimitedQueue) lazyInit() {
if q.tq == nil {
q.tq = btree.New(32)
}
if q.tm == nil {
q.tm = make(map[string]*limitedBroadcast)
}
}
// queueBroadcast is like QueueBroadcast but you can use a nonzero value for
// the initial transmit tier assigned to the message. This is meant to be used
// for unit testing.
func (q *TransmitLimitedQueue) queueBroadcast(b Broadcast, initialTransmits int) {
q.mu.Lock()
defer q.mu.Unlock()
q.lazyInit()
if q.idGen == math.MaxInt64 {
// it's super duper unlikely to wrap around within the retransmit limit
q.idGen = 1
} else {
q.idGen++
}
id := q.idGen
lb := &limitedBroadcast{
transmits: initialTransmits,
msgLen: int64(len(b.Message())),
id: id,
b: b,
}
unique := false
if nb, ok := b.(NamedBroadcast); ok {
lb.name = nb.Name()
} else if _, ok := b.(UniqueBroadcast); ok {
unique = true
}
// Check if this message invalidates another.
if lb.name != "" {
if old, ok := q.tm[lb.name]; ok {
old.b.Finished()
q.deleteItem(old)
}
} else if !unique {
// Slow path, hopefully nothing hot hits this.
var remove []*limitedBroadcast
q.tq.Ascend(func(item btree.Item) bool {
cur := item.(*limitedBroadcast)
// Special Broadcasts can only invalidate each other.
switch cur.b.(type) {
case NamedBroadcast:
// noop
case UniqueBroadcast:
// noop
default:
if b.Invalidates(cur.b) {
cur.b.Finished()
remove = append(remove, cur)
}
}
return true
})
for _, cur := range remove {
q.deleteItem(cur)
}
}
// Append to the queue
q.bcQueue = append(q.bcQueue, &limitedBroadcast{0, b})
// Append to the relevant queue.
q.addItem(lb)
}
// deleteItem removes the given item from the overall datastructure. You
// must already hold the mutex.
func (q *TransmitLimitedQueue) deleteItem(cur *limitedBroadcast) {
_ = q.tq.Delete(cur)
if cur.name != "" {
delete(q.tm, cur.name)
}
if q.tq.Len() == 0 {
// At idle there's no reason to let the id generator keep going
// indefinitely.
q.idGen = 0
}
}
// addItem adds the given item into the overall datastructure. You must already
// hold the mutex.
func (q *TransmitLimitedQueue) addItem(cur *limitedBroadcast) {
_ = q.tq.ReplaceOrInsert(cur)
if cur.name != "" {
q.tm[cur.name] = cur
}
}
// getTransmitRange returns a pair of min/max values for transmit values
// represented by the current queue contents. Both values represent actual
// transmit values on the interval [0, len). You must already hold the mutex.
func (q *TransmitLimitedQueue) getTransmitRange() (minTransmit, maxTransmit int) {
if q.lenLocked() == 0 {
return 0, 0
}
minItem, maxItem := q.tq.Min(), q.tq.Max()
if minItem == nil || maxItem == nil {
return 0, 0
}
min := minItem.(*limitedBroadcast).transmits
max := maxItem.(*limitedBroadcast).transmits
return min, max
}
// GetBroadcasts is used to get a number of broadcasts, up to a byte limit
// and applying a per-message overhead as provided.
func (q *TransmitLimitedQueue) GetBroadcasts(overhead, limit int) [][]byte {
q.Lock()
defer q.Unlock()
q.mu.Lock()
defer q.mu.Unlock()
// Fast path the default case
if len(q.bcQueue) == 0 {
if q.lenLocked() == 0 {
return nil
}
transmitLimit := retransmitLimit(q.RetransmitMult, q.NumNodes())
bytesUsed := 0
var toSend [][]byte
for i := len(q.bcQueue) - 1; i >= 0; i-- {
// Check if this is within our limits
b := q.bcQueue[i]
msg := b.b.Message()
if bytesUsed+overhead+len(msg) > limit {
var (
bytesUsed int
toSend [][]byte
reinsert []*limitedBroadcast
)
// Visit fresher items first, but only look at stuff that will fit.
// We'll go tier by tier, grabbing the largest items first.
minTr, maxTr := q.getTransmitRange()
for transmits := minTr; transmits <= maxTr; /*do not advance automatically*/ {
free := int64(limit - bytesUsed - overhead)
if free <= 0 {
break // bail out early
}
// Search for the least element on a given tier (by transmit count) as
// defined in the limitedBroadcast.Less function that will fit into our
// remaining space.
greaterOrEqual := &limitedBroadcast{
transmits: transmits,
msgLen: free,
id: math.MaxInt64,
}
lessThan := &limitedBroadcast{
transmits: transmits + 1,
msgLen: math.MaxInt64,
id: math.MaxInt64,
}
var keep *limitedBroadcast
q.tq.AscendRange(greaterOrEqual, lessThan, func(item btree.Item) bool {
cur := item.(*limitedBroadcast)
// Check if this is within our limits
if int64(len(cur.b.Message())) > free {
// If this happens it's a bug in the datastructure or
// surrounding use doing something like having len(Message())
// change over time. There's enough going on here that it's
// probably sane to just skip it and move on for now.
return true
}
keep = cur
return false
})
if keep == nil {
// No more items of an appropriate size in the tier.
transmits++
continue
}
msg := keep.b.Message()
// Add to slice to send
bytesUsed += overhead + len(msg)
toSend = append(toSend, msg)
// Check if we should stop transmission
b.transmits++
if b.transmits >= transmitLimit {
b.b.Finished()
n := len(q.bcQueue)
q.bcQueue[i], q.bcQueue[n-1] = q.bcQueue[n-1], nil
q.bcQueue = q.bcQueue[:n-1]
q.deleteItem(keep)
if keep.transmits+1 >= transmitLimit {
keep.b.Finished()
} else {
// We need to bump this item down to another transmit tier, but
// because it would be in the same direction that we're walking the
// tiers, we will have to delay the reinsertion until we are
// finished our search. Otherwise we'll possibly re-add the message
// when we ascend to the next tier.
keep.transmits++
reinsert = append(reinsert, keep)
}
}
// If we are sending anything, we need to re-sort to deal
// with adjusted transmit counts
if len(toSend) > 0 {
q.bcQueue.Sort()
for _, cur := range reinsert {
q.addItem(cur)
}
return toSend
}
// NumQueued returns the number of queued messages
func (q *TransmitLimitedQueue) NumQueued() int {
q.Lock()
defer q.Unlock()
return len(q.bcQueue)
q.mu.Lock()
defer q.mu.Unlock()
return q.lenLocked()
}
// Reset clears all the queued messages
func (q *TransmitLimitedQueue) Reset() {
q.Lock()
defer q.Unlock()
for _, b := range q.bcQueue {
b.b.Finished()
// lenLocked returns the length of the overall queue datastructure. You must
// hold the mutex.
func (q *TransmitLimitedQueue) lenLocked() int {
if q.tq == nil {
return 0
}
q.bcQueue = nil
return q.tq.Len()
}
// Reset clears all the queued messages. Should only be used for tests.
func (q *TransmitLimitedQueue) Reset() {
q.mu.Lock()
defer q.mu.Unlock()
q.walkReadOnlyLocked(false, func(cur *limitedBroadcast) bool {
cur.b.Finished()
return true
})
q.tq = nil
q.tm = nil
q.idGen = 0
}
// Prune will retain the maxRetain latest messages, and the rest
// will be discarded. This can be used to prevent unbounded queue sizes
func (q *TransmitLimitedQueue) Prune(maxRetain int) {
q.Lock()
defer q.Unlock()
q.mu.Lock()
defer q.mu.Unlock()
// Do nothing if queue size is less than the limit
n := len(q.bcQueue)
if n < maxRetain {
return
for q.tq.Len() > maxRetain {
item := q.tq.Max()
if item == nil {
break
}
cur := item.(*limitedBroadcast)
cur.b.Finished()
q.deleteItem(cur)
}
// Invalidate the messages we will be removing
for i := 0; i < n-maxRetain; i++ {
q.bcQueue[i].b.Finished()
}
// Move the messages, and retain only the last maxRetain
copy(q.bcQueue[0:], q.bcQueue[n-maxRetain:])
q.bcQueue = q.bcQueue[:maxRetain]
}
func (b limitedBroadcasts) Len() int {
return len(b)
}
func (b limitedBroadcasts) Less(i, j int) bool {
return b[i].transmits < b[j].transmits
}
func (b limitedBroadcasts) Swap(i, j int) {
b[i], b[j] = b[j], b[i]
}
func (b limitedBroadcasts) Sort() {
sort.Sort(sort.Reverse(b))
}

View file

@ -106,7 +106,10 @@ func encryptPayload(vsn encryptionVersion, key []byte, msg []byte, data []byte,
dst.WriteByte(byte(vsn))
// Add a random nonce
io.CopyN(dst, rand.Reader, nonceSize)
_, err = io.CopyN(dst, rand.Reader, nonceSize)
if err != nil {
return err
}
afterNonce := dst.Len()
// Ensure we are correctly padded (only version 0)
@ -196,3 +199,22 @@ func decryptPayload(keys [][]byte, msg []byte, data []byte) ([]byte, error) {
return nil, fmt.Errorf("No installed keys could decrypt the message")
}
func appendBytes(first []byte, second []byte) []byte {
hasFirst := len(first) > 0
hasSecond := len(second) > 0
switch {
case hasFirst && hasSecond:
out := make([]byte, 0, len(first)+len(second))
out = append(out, first...)
out = append(out, second...)
return out
case hasFirst:
return first
case hasSecond:
return second
default:
return nil
}
}

View file

@ -6,32 +6,35 @@ import (
"math"
"math/rand"
"net"
"strings"
"sync/atomic"
"time"
"github.com/armon/go-metrics"
metrics "github.com/armon/go-metrics"
)
type nodeStateType int
type NodeStateType int
const (
stateAlive nodeStateType = iota
stateSuspect
stateDead
StateAlive NodeStateType = iota
StateSuspect
StateDead
StateLeft
)
// Node represents a node in the cluster.
type Node struct {
Name string
Addr net.IP
Port uint16
Meta []byte // Metadata from the delegate for this node.
PMin uint8 // Minimum protocol version this understands
PMax uint8 // Maximum protocol version this understands
PCur uint8 // Current version node is speaking
DMin uint8 // Min protocol version for the delegate to understand
DMax uint8 // Max protocol version for the delegate to understand
DCur uint8 // Current version delegate is speaking
Name string
Addr net.IP
Port uint16
Meta []byte // Metadata from the delegate for this node.
State NodeStateType // State of the node.
PMin uint8 // Minimum protocol version this understands
PMax uint8 // Maximum protocol version this understands
PCur uint8 // Current version node is speaking
DMin uint8 // Min protocol version for the delegate to understand
DMax uint8 // Max protocol version for the delegate to understand
DCur uint8 // Current version delegate is speaking
}
// Address returns the host:port form of a node's address, suitable for use
@ -40,6 +43,15 @@ func (n *Node) Address() string {
return joinHostPort(n.Addr.String(), n.Port)
}
// FullAddress returns the node name and host:port form of a node's address,
// suitable for use with a transport.
func (n *Node) FullAddress() Address {
return Address{
Addr: joinHostPort(n.Addr.String(), n.Port),
Name: n.Name,
}
}
// String returns the node name
func (n *Node) String() string {
return n.Name
@ -49,7 +61,7 @@ func (n *Node) String() string {
type nodeState struct {
Node
Incarnation uint32 // Last known incarnation number
State nodeStateType // Current state
State NodeStateType // Current state
StateChange time.Time // Time last state change happened
}
@ -59,6 +71,16 @@ func (n *nodeState) Address() string {
return n.Node.Address()
}
// FullAddress returns the node name and host:port form of a node's address,
// suitable for use with a transport.
func (n *nodeState) FullAddress() Address {
return n.Node.FullAddress()
}
func (n *nodeState) DeadOrLeft() bool {
return n.State == StateDead || n.State == StateLeft
}
// ackHandler is used to register handlers for incoming acks and nacks.
type ackHandler struct {
ackFn func([]byte, time.Time)
@ -217,7 +239,7 @@ START:
node = *m.nodes[m.probeIndex]
if node.Name == m.config.Name {
skip = true
} else if node.State == stateDead {
} else if node.DeadOrLeft() {
skip = true
}
@ -233,20 +255,56 @@ START:
m.probeNode(&node)
}
// probeNodeByAddr just safely calls probeNode given only the address of the node (for tests)
func (m *Memberlist) probeNodeByAddr(addr string) {
m.nodeLock.RLock()
n := m.nodeMap[addr]
m.nodeLock.RUnlock()
m.probeNode(n)
}
// failedRemote checks the error and decides if it indicates a failure on the
// other end.
func failedRemote(err error) bool {
switch t := err.(type) {
case *net.OpError:
if strings.HasPrefix(t.Net, "tcp") {
switch t.Op {
case "dial", "read", "write":
return true
}
} else if strings.HasPrefix(t.Net, "udp") {
switch t.Op {
case "write":
return true
}
}
}
return false
}
// probeNode handles a single round of failure checking on a node.
func (m *Memberlist) probeNode(node *nodeState) {
defer metrics.MeasureSince([]string{"memberlist", "probeNode"}, time.Now())
defer metrics.MeasureSinceWithLabels([]string{"memberlist", "probeNode"}, time.Now(), m.metricLabels)
// We use our health awareness to scale the overall probe interval, so we
// slow down if we detect problems. The ticker that calls us can handle
// us running over the base interval, and will skip missed ticks.
probeInterval := m.awareness.ScaleTimeout(m.config.ProbeInterval)
if probeInterval > m.config.ProbeInterval {
metrics.IncrCounter([]string{"memberlist", "degraded", "probe"}, 1)
metrics.IncrCounterWithLabels([]string{"memberlist", "degraded", "probe"}, 1, m.metricLabels)
}
// Prepare a ping message and setup an ack handler.
ping := ping{SeqNo: m.nextSeqNo(), Node: node.Name}
selfAddr, selfPort := m.getAdvertise()
ping := ping{
SeqNo: m.nextSeqNo(),
Node: node.Name,
SourceAddr: selfAddr,
SourcePort: selfPort,
SourceNode: m.config.Name,
}
ackCh := make(chan ackMessage, m.config.IndirectChecks+1)
nackCh := make(chan struct{}, m.config.IndirectChecks+1)
m.setProbeChannels(ping.SeqNo, ackCh, nackCh, probeInterval)
@ -263,15 +321,25 @@ func (m *Memberlist) probeNode(node *nodeState) {
// soon as possible.
deadline := sent.Add(probeInterval)
addr := node.Address()
if node.State == stateAlive {
if err := m.encodeAndSendMsg(addr, pingMsg, &ping); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err)
return
// Arrange for our self-awareness to get updated.
var awarenessDelta int
defer func() {
m.awareness.ApplyDelta(awarenessDelta)
}()
if node.State == StateAlive {
if err := m.encodeAndSendMsg(node.FullAddress(), pingMsg, &ping); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send UDP ping: %s", err)
if failedRemote(err) {
goto HANDLE_REMOTE_FAILURE
} else {
return
}
}
} else {
var msgs [][]byte
if buf, err := encode(pingMsg, &ping); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to encode ping message: %s", err)
m.logger.Printf("[ERR] memberlist: Failed to encode UDP ping message: %s", err)
return
} else {
msgs = append(msgs, buf.Bytes())
@ -285,9 +353,13 @@ func (m *Memberlist) probeNode(node *nodeState) {
}
compound := makeCompoundMessage(msgs)
if err := m.rawSendMsgPacket(addr, &node.Node, compound.Bytes()); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send compound ping and suspect message to %s: %s", addr, err)
return
if err := m.rawSendMsgPacket(node.FullAddress(), &node.Node, compound.Bytes()); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send UDP compound ping and suspect message to %s: %s", addr, err)
if failedRemote(err) {
goto HANDLE_REMOTE_FAILURE
} else {
return
}
}
}
@ -296,10 +368,7 @@ func (m *Memberlist) probeNode(node *nodeState) {
// which will improve our health until we get to the failure scenarios
// at the end of this function, which will alter this delta variable
// accordingly.
awarenessDelta := -1
defer func() {
m.awareness.ApplyDelta(awarenessDelta)
}()
awarenessDelta = -1
// Wait for response or round-trip-time.
select {
@ -324,21 +393,31 @@ func (m *Memberlist) probeNode(node *nodeState) {
// probe interval it will give the TCP fallback more time, which
// is more active in dealing with lost packets, and it gives more
// time to wait for indirect acks/nacks.
m.logger.Printf("[DEBUG] memberlist: Failed ping: %v (timeout reached)", node.Name)
m.logger.Printf("[DEBUG] memberlist: Failed UDP ping: %s (timeout reached)", node.Name)
}
HANDLE_REMOTE_FAILURE:
// Get some random live nodes.
m.nodeLock.RLock()
kNodes := kRandomNodes(m.config.IndirectChecks, m.nodes, func(n *nodeState) bool {
return n.Name == m.config.Name ||
n.Name == node.Name ||
n.State != stateAlive
n.State != StateAlive
})
m.nodeLock.RUnlock()
// Attempt an indirect ping.
expectedNacks := 0
ind := indirectPingReq{SeqNo: ping.SeqNo, Target: node.Addr, Port: node.Port, Node: node.Name}
selfAddr, selfPort = m.getAdvertise()
ind := indirectPingReq{
SeqNo: ping.SeqNo,
Target: node.Addr,
Port: node.Port,
Node: node.Name,
SourceAddr: selfAddr,
SourcePort: selfPort,
SourceNode: m.config.Name,
}
for _, peer := range kNodes {
// We only expect nack to be sent from peers who understand
// version 4 of the protocol.
@ -346,8 +425,8 @@ func (m *Memberlist) probeNode(node *nodeState) {
expectedNacks++
}
if err := m.encodeAndSendMsg(peer.Address(), indirectPingMsg, &ind); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send indirect ping: %s", err)
if err := m.encodeAndSendMsg(peer.FullAddress(), indirectPingMsg, &ind); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send indirect UDP ping: %s", err)
}
}
@ -362,12 +441,19 @@ func (m *Memberlist) probeNode(node *nodeState) {
// which protocol version we are speaking. That's why we've included a
// config option to turn this off if desired.
fallbackCh := make(chan bool, 1)
if (!m.config.DisableTcpPings) && (node.PMax >= 3) {
disableTcpPings := m.config.DisableTcpPings ||
(m.config.DisableTcpPingsForNode != nil && m.config.DisableTcpPingsForNode(node.Name))
if (!disableTcpPings) && (node.PMax >= 3) {
go func() {
defer close(fallbackCh)
didContact, err := m.sendPingAndWaitForAck(node.Address(), ping, deadline)
didContact, err := m.sendPingAndWaitForAck(node.FullAddress(), ping, deadline)
if err != nil {
m.logger.Printf("[ERR] memberlist: Failed fallback ping: %s", err)
var to string
if ne, ok := err.(net.Error); ok && ne.Timeout() {
to = fmt.Sprintf("timeout %s: ", probeInterval)
}
m.logger.Printf("[ERR] memberlist: Failed fallback TCP ping: %s%s", to, err)
} else {
fallbackCh <- didContact
}
@ -392,7 +478,7 @@ func (m *Memberlist) probeNode(node *nodeState) {
// any additional time here.
for didContact := range fallbackCh {
if didContact {
m.logger.Printf("[WARN] memberlist: Was able to connect to %s but other probes failed, network may be misconfigured", node.Name)
m.logger.Printf("[WARN] memberlist: Was able to connect to %s over TCP but UDP probes failed, network may be misconfigured", node.Name)
return
}
}
@ -422,12 +508,21 @@ func (m *Memberlist) probeNode(node *nodeState) {
// Ping initiates a ping to the node with the specified name.
func (m *Memberlist) Ping(node string, addr net.Addr) (time.Duration, error) {
// Prepare a ping message and setup an ack handler.
ping := ping{SeqNo: m.nextSeqNo(), Node: node}
selfAddr, selfPort := m.getAdvertise()
ping := ping{
SeqNo: m.nextSeqNo(),
Node: node,
SourceAddr: selfAddr,
SourcePort: selfPort,
SourceNode: m.config.Name,
}
ackCh := make(chan ackMessage, m.config.IndirectChecks+1)
m.setProbeChannels(ping.SeqNo, ackCh, nil, m.config.ProbeInterval)
a := Address{Addr: addr.String(), Name: node}
// Send a ping to the node.
if err := m.encodeAndSendMsg(addr.String(), pingMsg, &ping); err != nil {
if err := m.encodeAndSendMsg(a, pingMsg, &ping); err != nil {
return 0, err
}
@ -478,7 +573,7 @@ func (m *Memberlist) resetNodes() {
// gossip is invoked every GossipInterval period to broadcast our gossip
// messages to a few random nodes.
func (m *Memberlist) gossip() {
defer metrics.MeasureSince([]string{"memberlist", "gossip"}, time.Now())
defer metrics.MeasureSinceWithLabels([]string{"memberlist", "gossip"}, time.Now(), m.metricLabels)
// Get some random live, suspect, or recently dead nodes
m.nodeLock.RLock()
@ -488,10 +583,10 @@ func (m *Memberlist) gossip() {
}
switch n.State {
case stateAlive, stateSuspect:
case StateAlive, StateSuspect:
return false
case stateDead:
case StateDead:
return time.Since(n.StateChange) > m.config.GossipToTheDeadTime
default:
@ -501,7 +596,7 @@ func (m *Memberlist) gossip() {
m.nodeLock.RUnlock()
// Compute the bytes available
bytesAvail := m.config.UDPBufferSize - compoundHeaderOverhead
bytesAvail := m.config.UDPBufferSize - compoundHeaderOverhead - labelOverhead(m.config.Label)
if m.config.EncryptionEnabled() {
bytesAvail -= encryptOverhead(m.encryptionVersion())
}
@ -516,14 +611,16 @@ func (m *Memberlist) gossip() {
addr := node.Address()
if len(msgs) == 1 {
// Send single message as is
if err := m.rawSendMsgPacket(addr, &node.Node, msgs[0]); err != nil {
if err := m.rawSendMsgPacket(node.FullAddress(), &node, msgs[0]); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", addr, err)
}
} else {
// Otherwise create and send a compound message
compound := makeCompoundMessage(msgs)
if err := m.rawSendMsgPacket(addr, &node.Node, compound.Bytes()); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", addr, err)
// Otherwise create and send one or more compound messages
compounds := makeCompoundMessages(msgs)
for _, compound := range compounds {
if err := m.rawSendMsgPacket(node.FullAddress(), &node, compound.Bytes()); err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", addr, err)
}
}
}
}
@ -538,7 +635,7 @@ func (m *Memberlist) pushPull() {
m.nodeLock.RLock()
nodes := kRandomNodes(1, m.nodes, func(n *nodeState) bool {
return n.Name == m.config.Name ||
n.State != stateAlive
n.State != StateAlive
})
m.nodeLock.RUnlock()
@ -549,17 +646,17 @@ func (m *Memberlist) pushPull() {
node := nodes[0]
// Attempt a push pull
if err := m.pushPullNode(node.Address(), false); err != nil {
if err := m.pushPullNode(node.FullAddress(), false); err != nil {
m.logger.Printf("[ERR] memberlist: Push/Pull with %s failed: %s", node.Name, err)
}
}
// pushPullNode does a complete state exchange with a specific node.
func (m *Memberlist) pushPullNode(addr string, join bool) error {
defer metrics.MeasureSince([]string{"memberlist", "pushPullNode"}, time.Now())
func (m *Memberlist) pushPullNode(a Address, join bool) error {
defer metrics.MeasureSinceWithLabels([]string{"memberlist", "pushPullNode"}, time.Now(), m.metricLabels)
// Attempt to send and receive with the node
remote, userState, err := m.sendAndReceiveState(addr, join)
remote, userState, err := m.sendAndReceiveState(a, join)
if err != nil {
return err
}
@ -596,7 +693,7 @@ func (m *Memberlist) verifyProtocol(remote []pushNodeState) error {
for _, rn := range remote {
// If the node isn't alive, then skip it
if rn.State != stateAlive {
if rn.State != StateAlive {
continue
}
@ -625,7 +722,7 @@ func (m *Memberlist) verifyProtocol(remote []pushNodeState) error {
for _, n := range m.nodes {
// Ignore non-alive nodes
if n.State != stateAlive {
if n.State != StateAlive {
continue
}
@ -841,11 +938,26 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
return
}
if len(a.Vsn) >= 3 {
pMin := a.Vsn[0]
pMax := a.Vsn[1]
pCur := a.Vsn[2]
if pMin == 0 || pMax == 0 || pMin > pMax {
m.logger.Printf("[WARN] memberlist: Ignoring an alive message for '%s' (%v:%d) because protocol version(s) are wrong: %d <= %d <= %d should be >0", a.Node, net.IP(a.Addr), a.Port, pMin, pCur, pMax)
return
}
}
// Invoke the Alive delegate if any. This can be used to filter out
// alive messages based on custom logic. For example, using a cluster name.
// Using a merge delegate is not enough, as it is possible for passive
// cluster merging to still occur.
if m.config.Alive != nil {
if len(a.Vsn) < 6 {
m.logger.Printf("[WARN] memberlist: ignoring alive message for '%s' (%v:%d) because Vsn is not present",
a.Node, net.IP(a.Addr), a.Port)
return
}
node := &Node{
Name: a.Node,
Addr: a.Addr,
@ -867,7 +979,13 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
// Check if we've never seen this node before, and if not, then
// store this node in our node map.
var updatesNode bool
if !ok {
errCon := m.config.IPAllowed(a.Addr)
if errCon != nil {
m.logger.Printf("[WARN] memberlist: Rejected node %s (%v): %s", a.Node, net.IP(a.Addr), errCon)
return
}
state = &nodeState{
Node: Node{
Name: a.Node,
@ -875,7 +993,15 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
Port: a.Port,
Meta: a.Meta,
},
State: stateDead,
State: StateDead,
}
if len(a.Vsn) > 5 {
state.PMin = a.Vsn[0]
state.PMax = a.Vsn[1]
state.PCur = a.Vsn[2]
state.DMin = a.Vsn[3]
state.DMax = a.Vsn[4]
state.DCur = a.Vsn[5]
}
// Add to map
@ -894,29 +1020,45 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
// Update numNodes after we've added a new node
atomic.AddUint32(&m.numNodes, 1)
}
// Check if this address is different than the existing node
if !bytes.Equal([]byte(state.Addr), a.Addr) || state.Port != a.Port {
m.logger.Printf("[ERR] memberlist: Conflicting address for %s. Mine: %v:%d Theirs: %v:%d",
state.Name, state.Addr, state.Port, net.IP(a.Addr), a.Port)
// Inform the conflict delegate if provided
if m.config.Conflict != nil {
other := Node{
Name: a.Node,
Addr: a.Addr,
Port: a.Port,
Meta: a.Meta,
} else {
// Check if this address is different than the existing node unless the old node is dead.
if !bytes.Equal([]byte(state.Addr), a.Addr) || state.Port != a.Port {
errCon := m.config.IPAllowed(a.Addr)
if errCon != nil {
m.logger.Printf("[WARN] memberlist: Rejected IP update from %v to %v for node %s: %s", a.Node, state.Addr, net.IP(a.Addr), errCon)
return
}
// If DeadNodeReclaimTime is configured, check if enough time has elapsed since the node died.
canReclaim := (m.config.DeadNodeReclaimTime > 0 &&
time.Since(state.StateChange) > m.config.DeadNodeReclaimTime)
// Allow the address to be updated if a dead node is being replaced.
if state.State == StateLeft || (state.State == StateDead && canReclaim) {
m.logger.Printf("[INFO] memberlist: Updating address for left or failed node %s from %v:%d to %v:%d",
state.Name, state.Addr, state.Port, net.IP(a.Addr), a.Port)
updatesNode = true
} else {
m.logger.Printf("[ERR] memberlist: Conflicting address for %s. Mine: %v:%d Theirs: %v:%d Old state: %v",
state.Name, state.Addr, state.Port, net.IP(a.Addr), a.Port, state.State)
// Inform the conflict delegate if provided
if m.config.Conflict != nil {
other := Node{
Name: a.Node,
Addr: a.Addr,
Port: a.Port,
Meta: a.Meta,
}
m.config.Conflict.NotifyConflict(&state.Node, &other)
}
return
}
m.config.Conflict.NotifyConflict(&state.Node, &other)
}
return
}
// Bail if the incarnation number is older, and this is not about us
isLocalNode := state.Name == m.config.Name
if a.Incarnation <= state.Incarnation && !isLocalNode {
if a.Incarnation <= state.Incarnation && !isLocalNode && !updatesNode {
return
}
@ -956,9 +1098,8 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
bytes.Equal(a.Vsn, versions) {
return
}
m.refute(state, a.Incarnation)
m.logger.Printf("[WARN] memberlist: Refuting an alive message")
m.logger.Printf("[WARN] memberlist: Refuting an alive message for '%s' (%v:%d) meta:(%v VS %v), vsn:(%v VS %v)", a.Node, net.IP(a.Addr), a.Port, a.Meta, state.Meta, a.Vsn, versions)
} else {
m.encodeBroadcastNotify(a.Node, aliveMsg, a, notify)
@ -975,19 +1116,21 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
// Update the state and incarnation number
state.Incarnation = a.Incarnation
state.Meta = a.Meta
if state.State != stateAlive {
state.State = stateAlive
state.Addr = a.Addr
state.Port = a.Port
if state.State != StateAlive {
state.State = StateAlive
state.StateChange = time.Now()
}
}
// Update metrics
metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1)
metrics.IncrCounterWithLabels([]string{"memberlist", "msg", "alive"}, 1, m.metricLabels)
// Notify the delegate of any relevant updates
if m.config.Events != nil {
if oldState == stateDead {
// if Dead -> Alive, notify of join
if oldState == StateDead || oldState == StateLeft {
// if Dead/Left -> Alive, notify of join
m.config.Events.NotifyJoin(&state.Node)
} else if !bytes.Equal(oldMeta, state.Meta) {
@ -1026,7 +1169,7 @@ func (m *Memberlist) suspectNode(s *suspect) {
}
// Ignore non-alive nodes
if state.State != stateAlive {
if state.State != StateAlive {
return
}
@ -1040,11 +1183,11 @@ func (m *Memberlist) suspectNode(s *suspect) {
}
// Update metrics
metrics.IncrCounter([]string{"memberlist", "msg", "suspect"}, 1)
metrics.IncrCounterWithLabels([]string{"memberlist", "msg", "suspect"}, 1, m.metricLabels)
// Update the state
state.Incarnation = s.Incarnation
state.State = stateSuspect
state.State = StateSuspect
changeTime := time.Now()
state.StateChange = changeTime
@ -1066,20 +1209,25 @@ func (m *Memberlist) suspectNode(s *suspect) {
min := suspicionTimeout(m.config.SuspicionMult, n, m.config.ProbeInterval)
max := time.Duration(m.config.SuspicionMaxTimeoutMult) * min
fn := func(numConfirmations int) {
var d *dead
m.nodeLock.Lock()
state, ok := m.nodeMap[s.Node]
timeout := ok && state.State == stateSuspect && state.StateChange == changeTime
timeout := ok && state.State == StateSuspect && state.StateChange == changeTime
if timeout {
d = &dead{Incarnation: state.Incarnation, Node: state.Name, From: m.config.Name}
}
m.nodeLock.Unlock()
if timeout {
if k > 0 && numConfirmations < k {
metrics.IncrCounter([]string{"memberlist", "degraded", "timeout"}, 1)
metrics.IncrCounterWithLabels([]string{"memberlist", "degraded", "timeout"}, 1, m.metricLabels)
}
m.logger.Printf("[INFO] memberlist: Marking %s as failed, suspect timeout reached (%d peer confirmations)",
state.Name, numConfirmations)
d := dead{Incarnation: state.Incarnation, Node: state.Name, From: m.config.Name}
m.deadNode(&d)
m.deadNode(d)
}
}
m.nodeTimers[s.Node] = newSuspicion(s.From, k, min, max, fn)
@ -1106,7 +1254,7 @@ func (m *Memberlist) deadNode(d *dead) {
delete(m.nodeTimers, d.Node)
// Ignore if node is already dead
if state.State == stateDead {
if state.DeadOrLeft() {
return
}
@ -1126,11 +1274,18 @@ func (m *Memberlist) deadNode(d *dead) {
}
// Update metrics
metrics.IncrCounter([]string{"memberlist", "msg", "dead"}, 1)
metrics.IncrCounterWithLabels([]string{"memberlist", "msg", "dead"}, 1, m.metricLabels)
// Update the state
state.Incarnation = d.Incarnation
state.State = stateDead
// If the dead message was send by the node itself, mark it is left
// instead of dead.
if d.Node == d.From {
state.State = StateLeft
} else {
state.State = StateDead
}
state.StateChange = time.Now()
// Notify of death
@ -1144,7 +1299,7 @@ func (m *Memberlist) deadNode(d *dead) {
func (m *Memberlist) mergeState(remote []pushNodeState) {
for _, r := range remote {
switch r.State {
case stateAlive:
case StateAlive:
a := alive{
Incarnation: r.Incarnation,
Node: r.Name,
@ -1155,11 +1310,14 @@ func (m *Memberlist) mergeState(remote []pushNodeState) {
}
m.aliveNode(&a, nil, false)
case stateDead:
case StateLeft:
d := dead{Incarnation: r.Incarnation, Node: r.Name, From: r.Name}
m.deadNode(&d)
case StateDead:
// If the remote node believes a node is dead, we prefer to
// suspect that node instead of declaring it dead instantly
fallthrough
case stateSuspect:
case StateSuspect:
s := suspect{Incarnation: r.Incarnation, Node: r.Name, From: m.config.Name}
m.suspectNode(&s)
}

View file

@ -1,6 +1,7 @@
package memberlist
import (
"fmt"
"net"
"time"
)
@ -63,3 +64,97 @@ type Transport interface {
// transport a chance to clean up any listeners.
Shutdown() error
}
type Address struct {
// Addr is a network address as a string, similar to Dial. This usually is
// in the form of "host:port". This is required.
Addr string
// Name is the name of the node being addressed. This is optional but
// transports may require it.
Name string
}
func (a *Address) String() string {
if a.Name != "" {
return fmt.Sprintf("%s (%s)", a.Name, a.Addr)
}
return a.Addr
}
// IngestionAwareTransport is not used.
//
// Deprecated: IngestionAwareTransport is not used and may be removed in a future
// version. Define the interface locally instead of referencing this exported
// interface.
type IngestionAwareTransport interface {
IngestPacket(conn net.Conn, addr net.Addr, now time.Time, shouldClose bool) error
IngestStream(conn net.Conn) error
}
type NodeAwareTransport interface {
Transport
WriteToAddress(b []byte, addr Address) (time.Time, error)
DialAddressTimeout(addr Address, timeout time.Duration) (net.Conn, error)
}
type shimNodeAwareTransport struct {
Transport
}
var _ NodeAwareTransport = (*shimNodeAwareTransport)(nil)
func (t *shimNodeAwareTransport) WriteToAddress(b []byte, addr Address) (time.Time, error) {
return t.WriteTo(b, addr.Addr)
}
func (t *shimNodeAwareTransport) DialAddressTimeout(addr Address, timeout time.Duration) (net.Conn, error) {
return t.DialTimeout(addr.Addr, timeout)
}
type labelWrappedTransport struct {
label string
NodeAwareTransport
}
var _ NodeAwareTransport = (*labelWrappedTransport)(nil)
func (t *labelWrappedTransport) WriteToAddress(buf []byte, addr Address) (time.Time, error) {
var err error
buf, err = AddLabelHeaderToPacket(buf, t.label)
if err != nil {
return time.Time{}, fmt.Errorf("failed to add label header to packet: %w", err)
}
return t.NodeAwareTransport.WriteToAddress(buf, addr)
}
func (t *labelWrappedTransport) WriteTo(buf []byte, addr string) (time.Time, error) {
var err error
buf, err = AddLabelHeaderToPacket(buf, t.label)
if err != nil {
return time.Time{}, err
}
return t.NodeAwareTransport.WriteTo(buf, addr)
}
func (t *labelWrappedTransport) DialAddressTimeout(addr Address, timeout time.Duration) (net.Conn, error) {
conn, err := t.NodeAwareTransport.DialAddressTimeout(addr, timeout)
if err != nil {
return nil, err
}
if err := AddLabelHeaderToStream(conn, t.label); err != nil {
return nil, fmt.Errorf("failed to add label header to stream: %w", err)
}
return conn, nil
}
func (t *labelWrappedTransport) DialTimeout(addr string, timeout time.Duration) (net.Conn, error) {
conn, err := t.NodeAwareTransport.DialTimeout(addr, timeout)
if err != nil {
return nil, err
}
if err := AddLabelHeaderToStream(conn, t.label); err != nil {
return nil, fmt.Errorf("failed to add label header to stream: %w", err)
}
return conn, nil
}

View file

@ -78,10 +78,9 @@ func retransmitLimit(retransmitMult, n int) int {
// shuffleNodes randomly shuffles the input nodes using the Fisher-Yates shuffle
func shuffleNodes(nodes []*nodeState) {
n := len(nodes)
for i := n - 1; i > 0; i-- {
j := rand.Intn(i + 1)
rand.Shuffle(n, func(i, j int) {
nodes[i], nodes[j] = nodes[j], nodes[i]
}
})
}
// pushPushScale is used to scale the time interval at which push/pull
@ -97,13 +96,13 @@ func pushPullScale(interval time.Duration, n int) time.Duration {
return time.Duration(multiplier) * interval
}
// moveDeadNodes moves nodes that are dead and beyond the gossip to the dead interval
// moveDeadNodes moves dead and left nodes that that have not changed during the gossipToTheDeadTime interval
// to the end of the slice and returns the index of the first moved node.
func moveDeadNodes(nodes []*nodeState, gossipToTheDeadTime time.Duration) int {
numDead := 0
n := len(nodes)
for i := 0; i < n-numDead; i++ {
if nodes[i].State != stateDead {
if !nodes[i].DeadOrLeft() {
continue
}
@ -120,39 +119,56 @@ func moveDeadNodes(nodes []*nodeState, gossipToTheDeadTime time.Duration) int {
return n - numDead
}
// kRandomNodes is used to select up to k random nodes, excluding any nodes where
// the filter function returns true. It is possible that less than k nodes are
// kRandomNodes is used to select up to k random Nodes, excluding any nodes where
// the exclude function returns true. It is possible that less than k nodes are
// returned.
func kRandomNodes(k int, nodes []*nodeState, filterFn func(*nodeState) bool) []*nodeState {
func kRandomNodes(k int, nodes []*nodeState, exclude func(*nodeState) bool) []Node {
n := len(nodes)
kNodes := make([]*nodeState, 0, k)
kNodes := make([]Node, 0, k)
OUTER:
// Probe up to 3*n times, with large n this is not necessary
// since k << n, but with small n we want search to be
// exhaustive
for i := 0; i < 3*n && len(kNodes) < k; i++ {
// Get random node
// Get random nodeState
idx := randomOffset(n)
node := nodes[idx]
state := nodes[idx]
// Give the filter a shot at it.
if filterFn != nil && filterFn(node) {
if exclude != nil && exclude(state) {
continue OUTER
}
// Check if we have this node already
for j := 0; j < len(kNodes); j++ {
if node == kNodes[j] {
if state.Node.Name == kNodes[j].Name {
continue OUTER
}
}
// Append the node
kNodes = append(kNodes, node)
kNodes = append(kNodes, state.Node)
}
return kNodes
}
// makeCompoundMessages takes a list of messages and packs
// them into one or multiple messages based on the limitations
// of compound messages (255 messages each).
func makeCompoundMessages(msgs [][]byte) []*bytes.Buffer {
const maxMsgs = 255
bufs := make([]*bytes.Buffer, 0, (len(msgs)+(maxMsgs-1))/maxMsgs)
for ; len(msgs) > maxMsgs; msgs = msgs[maxMsgs:] {
bufs = append(bufs, makeCompoundMessage(msgs[:maxMsgs]))
}
if len(msgs) > 0 {
bufs = append(bufs, makeCompoundMessage(msgs))
}
return bufs
}
// makeCompoundMessage takes a list of messages and generates
// a single compound message containing all of them
func makeCompoundMessage(msgs [][]byte) *bytes.Buffer {
@ -186,18 +202,18 @@ func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) {
err = fmt.Errorf("missing compound length byte")
return
}
numParts := uint8(buf[0])
numParts := int(buf[0])
buf = buf[1:]
// Check we have enough bytes
if len(buf) < int(numParts*2) {
if len(buf) < numParts*2 {
err = fmt.Errorf("truncated len slice")
return
}
// Decode the lengths
lengths := make([]uint16, numParts)
for i := 0; i < int(numParts); i++ {
for i := 0; i < numParts; i++ {
lengths[i] = binary.BigEndian.Uint16(buf[i*2 : i*2+2])
}
buf = buf[numParts*2:]
@ -205,7 +221,7 @@ func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) {
// Split each message
for idx, msgLen := range lengths {
if len(buf) < int(msgLen) {
trunc = int(numParts) - idx
trunc = numParts - idx
return
}