Merge pull request #18204 from mavenugo/dhb

Configurable discovery ttl and heartbeat timer
This commit is contained in:
Alexandre Beslic 2015-11-30 11:00:53 -08:00
commit 0f0cf267e8
3 changed files with 153 additions and 7 deletions

View file

@ -1,6 +1,8 @@
package daemon
import (
"fmt"
"strconv"
"time"
log "github.com/Sirupsen/logrus"
@ -13,22 +15,63 @@ import (
const (
// defaultDiscoveryHeartbeat is the default value for discovery heartbeat interval.
defaultDiscoveryHeartbeat = 20 * time.Second
// defaultDiscoveryTTL is the default TTL interface for discovery.
defaultDiscoveryTTL = 60 * time.Second
// defaultDiscoveryTTLFactor is the default TTL factor for discovery
defaultDiscoveryTTLFactor = 3
)
func discoveryOpts(clusterOpts map[string]string) (time.Duration, time.Duration, error) {
var (
heartbeat = defaultDiscoveryHeartbeat
ttl = defaultDiscoveryTTLFactor * defaultDiscoveryHeartbeat
)
if hb, ok := clusterOpts["discovery.heartbeat"]; ok {
h, err := strconv.Atoi(hb)
if err != nil {
return time.Duration(0), time.Duration(0), err
}
heartbeat = time.Duration(h) * time.Second
ttl = defaultDiscoveryTTLFactor * heartbeat
}
if tstr, ok := clusterOpts["discovery.ttl"]; ok {
t, err := strconv.Atoi(tstr)
if err != nil {
return time.Duration(0), time.Duration(0), err
}
ttl = time.Duration(t) * time.Second
if _, ok := clusterOpts["discovery.heartbeat"]; !ok {
h := int(t / defaultDiscoveryTTLFactor)
heartbeat = time.Duration(h) * time.Second
}
if ttl <= heartbeat {
return time.Duration(0), time.Duration(0),
fmt.Errorf("discovery.ttl timer must be greater than discovery.heartbeat")
}
}
return heartbeat, ttl, nil
}
// initDiscovery initialized the nodes discovery subsystem by connecting to the specified backend
// and start a registration loop to advertise the current node under the specified address.
func initDiscovery(backend, address string, clusterOpts map[string]string) (discovery.Backend, error) {
discoveryBackend, err := discovery.New(backend, defaultDiscoveryHeartbeat, defaultDiscoveryTTL, clusterOpts)
heartbeat, ttl, err := discoveryOpts(clusterOpts)
if err != nil {
return nil, err
}
discoveryBackend, err := discovery.New(backend, heartbeat, ttl, clusterOpts)
if err != nil {
return nil, err
}
// We call Register() on the discovery backend in a loop for the whole lifetime of the daemon,
// but we never actually Watch() for nodes appearing and disappearing for the moment.
go registrationLoop(discoveryBackend, address)
go registrationLoop(discoveryBackend, address, heartbeat)
return discoveryBackend, nil
}
@ -41,9 +84,9 @@ func registerAddr(backend discovery.Backend, addr string) {
// registrationLoop registers the current node against the discovery backend using the specified
// address. The function never returns, as registration against the backend comes with a TTL and
// requires regular heartbeats.
func registrationLoop(discoveryBackend discovery.Backend, address string) {
func registrationLoop(discoveryBackend discovery.Backend, address string, heartbeat time.Duration) {
registerAddr(discoveryBackend, address)
for range time.Tick(defaultDiscoveryHeartbeat) {
for range time.Tick(heartbeat) {
registerAddr(discoveryBackend, address)
}
}

91
daemon/discovery_test.go Normal file
View file

@ -0,0 +1,91 @@
package daemon
import (
"testing"
"time"
)
func TestDiscoveryOpts(t *testing.T) {
clusterOpts := map[string]string{"discovery.heartbeat": "10", "discovery.ttl": "5"}
heartbeat, ttl, err := discoveryOpts(clusterOpts)
if err == nil {
t.Fatalf("discovery.ttl < discovery.heartbeat must fail")
}
clusterOpts = map[string]string{"discovery.heartbeat": "10", "discovery.ttl": "10"}
heartbeat, ttl, err = discoveryOpts(clusterOpts)
if err == nil {
t.Fatalf("discovery.ttl == discovery.heartbeat must fail")
}
clusterOpts = map[string]string{"discovery.heartbeat": "invalid"}
heartbeat, ttl, err = discoveryOpts(clusterOpts)
if err == nil {
t.Fatalf("invalid discovery.heartbeat must fail")
}
clusterOpts = map[string]string{"discovery.ttl": "invalid"}
heartbeat, ttl, err = discoveryOpts(clusterOpts)
if err == nil {
t.Fatalf("invalid discovery.ttl must fail")
}
clusterOpts = map[string]string{"discovery.heartbeat": "10", "discovery.ttl": "20"}
heartbeat, ttl, err = discoveryOpts(clusterOpts)
if err != nil {
t.Fatal(err)
}
if heartbeat != 10*time.Second {
t.Fatalf("Heatbeat - Expected : %v, Actual : %v", 10*time.Second, heartbeat)
}
if ttl != 20*time.Second {
t.Fatalf("TTL - Expected : %v, Actual : %v", 20*time.Second, ttl)
}
clusterOpts = map[string]string{"discovery.heartbeat": "10"}
heartbeat, ttl, err = discoveryOpts(clusterOpts)
if err != nil {
t.Fatal(err)
}
if heartbeat != 10*time.Second {
t.Fatalf("Heatbeat - Expected : %v, Actual : %v", 10*time.Second, heartbeat)
}
expected := 10 * defaultDiscoveryTTLFactor * time.Second
if ttl != expected {
t.Fatalf("TTL - Expected : %v, Actual : %v", expected, ttl)
}
clusterOpts = map[string]string{"discovery.ttl": "30"}
heartbeat, ttl, err = discoveryOpts(clusterOpts)
if err != nil {
t.Fatal(err)
}
if ttl != 30*time.Second {
t.Fatalf("TTL - Expected : %v, Actual : %v", 30*time.Second, ttl)
}
expected = 30 * time.Second / defaultDiscoveryTTLFactor
if heartbeat != expected {
t.Fatalf("Heatbeat - Expected : %v, Actual : %v", expected, heartbeat)
}
clusterOpts = map[string]string{}
heartbeat, ttl, err = discoveryOpts(clusterOpts)
if err != nil {
t.Fatal(err)
}
if heartbeat != defaultDiscoveryHeartbeat {
t.Fatalf("Heatbeat - Expected : %v, Actual : %v", defaultDiscoveryHeartbeat, heartbeat)
}
expected = defaultDiscoveryHeartbeat * defaultDiscoveryTTLFactor
if ttl != expected {
t.Fatalf("TTL - Expected : %v, Actual : %v", expected, ttl)
}
}

View file

@ -565,6 +565,18 @@ docker daemon \
The currently supported cluster store options are:
* `discovery.heartbeat`
Specifies the heartbeat timer in seconds which is used by the daemon as a
keepalive mechanism to make sure discovery module treats the node as alive
in the cluster. If not configured, the default value is 20 seconds.
* `discovery.ttl`
Specifies the ttl (time-to-live) in seconds which is used by the discovery
module to timeout a node if a valid heartbeat is not received within the
configured ttl value. If not configured, the default value is 60 seconds.
* `kv.cacertfile`
Specifies the path to a local file with PEM encoded CA certificates to trust