Libnetwork revendoring

Diff:
5ab4ab8300...20dd462e0a

- Memberlist revendor (fix for deadlock on exit)
- Network diagnostic client
- Fix for ndots configuration

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
This commit is contained in:
Flavio Crisciani 2018-01-29 11:19:37 -08:00
parent 3a6f8cfd51
commit ec86547244
No known key found for this signature in database
GPG key ID: 28CAFCE754CF3A48
21 changed files with 371 additions and 338 deletions

View file

@ -312,17 +312,19 @@ func (daemon *Daemon) reloadLiveRestore(conf *config.Config, attributes map[stri
return nil
}
// reloadNetworkDiagnosticPort updates the network controller starting the diagnose mode if the config is valid
// reloadNetworkDiagnosticPort updates the network controller starting the diagnostic if the config is valid
func (daemon *Daemon) reloadNetworkDiagnosticPort(conf *config.Config, attributes map[string]string) error {
if conf == nil || daemon.netController == nil {
if conf == nil || daemon.netController == nil || !conf.IsValueSet("network-diagnostic-port") ||
conf.NetworkDiagnosticPort < 1 || conf.NetworkDiagnosticPort > 65535 {
// If there is no config make sure that the diagnostic is off
if daemon.netController != nil {
daemon.netController.StopDiagnostic()
}
return nil
}
// Enable the network diagnose if the flag is set with a valid port withing the range
if conf.IsValueSet("network-diagnostic-port") && conf.NetworkDiagnosticPort > 0 && conf.NetworkDiagnosticPort < 65536 {
logrus.Warnf("Calling the diagnostic start with %d", conf.NetworkDiagnosticPort)
daemon.netController.StartDiagnose(conf.NetworkDiagnosticPort)
} else {
daemon.netController.StopDiagnose()
}
// Enable the network diagnostic if the flag is set with a valid port withing the range
logrus.WithFields(logrus.Fields{"port": conf.NetworkDiagnosticPort, "ip": "127.0.0.1"}).Warn("Starting network diagnostic server")
daemon.netController.StartDiagnostic(conf.NetworkDiagnosticPort)
return nil
}

View file

@ -513,18 +513,18 @@ func TestDaemonReloadNetworkDiagnosticPort(t *testing.T) {
if err := daemon.Reload(enableConfig); err != nil {
t.Fatal(err)
}
// Check that the diagnose is enabled
if !daemon.netController.IsDiagnoseEnabled() {
t.Fatalf("diagnosed should be enable")
// Check that the diagnostic is enabled
if !daemon.netController.IsDiagnosticEnabled() {
t.Fatalf("diagnostic should be enable")
}
// Reload
if err := daemon.Reload(disableConfig); err != nil {
t.Fatal(err)
}
// Check that the diagnose is disabled
if daemon.netController.IsDiagnoseEnabled() {
t.Fatalf("diagnosed should be disable")
// Check that the diagnostic is disabled
if daemon.netController.IsDiagnosticEnabled() {
t.Fatalf("diagnostic should be disable")
}
}
@ -533,18 +533,18 @@ func TestDaemonReloadNetworkDiagnosticPort(t *testing.T) {
if err := daemon.Reload(enableConfig); err != nil {
t.Fatal(err)
}
// Check that the diagnose is enabled
if !daemon.netController.IsDiagnoseEnabled() {
t.Fatalf("diagnosed should be enable")
// Check that the diagnostic is enabled
if !daemon.netController.IsDiagnosticEnabled() {
t.Fatalf("diagnostic should be enable")
}
// Check that another reload does not cause issues
if err := daemon.Reload(enableConfig); err != nil {
t.Fatal(err)
}
// Check that the diagnose is enable
if !daemon.netController.IsDiagnoseEnabled() {
t.Fatalf("diagnosed should be enable")
// Check that the diagnostic is enable
if !daemon.netController.IsDiagnosticEnabled() {
t.Fatalf("diagnostic should be enable")
}
}

View file

@ -31,12 +31,12 @@ github.com/moby/buildkit aaff9d591ef128560018433fe61beb802e149de8
github.com/tonistiigi/fsutil dea3a0da73aee887fc02142d995be764106ac5e2
#get libnetwork packages
github.com/docker/libnetwork 5ab4ab830062fe8a30a44b75b0bda6b1f4f166a4
github.com/docker/libnetwork 20dd462e0a0e883437a274bd61df4bc4de980830
github.com/docker/go-events 9461782956ad83b30282bf90e31fa6a70c255ba9
github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
github.com/hashicorp/memberlist v0.1.0
github.com/hashicorp/memberlist 3d8438da9589e7b608a83ffac1ef8211486bcb7c
github.com/sean-/seed e2103e2c35297fb7e17febb81e49b312087a2372
github.com/hashicorp/go-sockaddr acd314c5781ea706c710d9ea70069fd2e110d61d
github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa0733f7e

View file

@ -297,8 +297,8 @@ func (c *controller) agentInit(listenAddr, bindAddrOrInterface, advertiseAddr, d
return err
}
// Register the diagnose handlers
c.DiagnoseServer.RegisterHandler(nDB, networkdb.NetDbPaths2Func)
// Register the diagnostic handlers
c.DiagnosticServer.RegisterHandler(nDB, networkdb.NetDbPaths2Func)
var cancelList []func()
ch, cancel := nDB.Watch(libnetworkEPTable, "", "")

View file

@ -61,7 +61,7 @@ import (
"github.com/docker/libnetwork/cluster"
"github.com/docker/libnetwork/config"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/diagnose"
"github.com/docker/libnetwork/diagnostic"
"github.com/docker/libnetwork/discoverapi"
"github.com/docker/libnetwork/driverapi"
"github.com/docker/libnetwork/drvregistry"
@ -136,12 +136,12 @@ type NetworkController interface {
// SetKeys configures the encryption key for gossip and overlay data path
SetKeys(keys []*types.EncryptionKey) error
// StartDiagnose start the network diagnose mode
StartDiagnose(port int)
// StopDiagnose start the network diagnose mode
StopDiagnose()
// IsDiagnoseEnabled returns true if the diagnose is enabled
IsDiagnoseEnabled() bool
// StartDiagnostic start the network diagnostic mode
StartDiagnostic(port int)
// StopDiagnostic start the network diagnostic mode
StopDiagnostic()
// IsDiagnosticEnabled returns true if the diagnostic is enabled
IsDiagnosticEnabled() bool
}
// NetworkWalker is a client provided function which will be used to walk the Networks.
@ -176,7 +176,7 @@ type controller struct {
agentStopDone chan struct{}
keys []*types.EncryptionKey
clusterConfigAvailable bool
DiagnoseServer *diagnose.Server
DiagnosticServer *diagnostic.Server
sync.Mutex
}
@ -188,16 +188,16 @@ type initializer struct {
// New creates a new instance of network controller.
func New(cfgOptions ...config.Option) (NetworkController, error) {
c := &controller{
id: stringid.GenerateRandomID(),
cfg: config.ParseConfigOptions(cfgOptions...),
sandboxes: sandboxTable{},
svcRecords: make(map[string]svcInfo),
serviceBindings: make(map[serviceKey]*service),
agentInitDone: make(chan struct{}),
networkLocker: locker.New(),
DiagnoseServer: diagnose.New(),
id: stringid.GenerateRandomID(),
cfg: config.ParseConfigOptions(cfgOptions...),
sandboxes: sandboxTable{},
svcRecords: make(map[string]svcInfo),
serviceBindings: make(map[serviceKey]*service),
agentInitDone: make(chan struct{}),
networkLocker: locker.New(),
DiagnosticServer: diagnostic.New(),
}
c.DiagnoseServer.Init()
c.DiagnosticServer.Init()
if err := c.initStores(); err != nil {
return nil, err
@ -1307,27 +1307,27 @@ func (c *controller) Stop() {
osl.GC()
}
// StartDiagnose start the network diagnose mode
func (c *controller) StartDiagnose(port int) {
// StartDiagnostic start the network dias mode
func (c *controller) StartDiagnostic(port int) {
c.Lock()
if !c.DiagnoseServer.IsDebugEnable() {
c.DiagnoseServer.EnableDebug("127.0.0.1", port)
if !c.DiagnosticServer.IsDiagnosticEnabled() {
c.DiagnosticServer.EnableDiagnostic("127.0.0.1", port)
}
c.Unlock()
}
// StopDiagnose start the network diagnose mode
func (c *controller) StopDiagnose() {
// StopDiagnostic start the network dias mode
func (c *controller) StopDiagnostic() {
c.Lock()
if c.DiagnoseServer.IsDebugEnable() {
c.DiagnoseServer.DisableDebug()
if c.DiagnosticServer.IsDiagnosticEnabled() {
c.DiagnosticServer.DisableDiagnostic()
}
c.Unlock()
}
// IsDiagnoseEnabled returns true if the diagnose is enabled
func (c *controller) IsDiagnoseEnabled() bool {
// IsDiagnosticEnabled returns true if the dias is enabled
func (c *controller) IsDiagnosticEnabled() bool {
c.Lock()
defer c.Unlock()
return c.DiagnoseServer.IsDebugEnable()
return c.DiagnosticServer.IsDiagnosticEnabled()
}

View file

@ -1,4 +1,4 @@
package diagnose
package diagnostic
import (
"context"
@ -44,7 +44,7 @@ type Server struct {
sync.Mutex
}
// New creates a new diagnose server
// New creates a new diagnostic server
func New() *Server {
return &Server{
registeredHanders: make(map[string]bool),
@ -78,8 +78,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.mux.ServeHTTP(w, r)
}
// EnableDebug opens a TCP socket to debug the passed network DB
func (s *Server) EnableDebug(ip string, port int) {
// EnableDiagnostic opens a TCP socket to debug the passed network DB
func (s *Server) EnableDiagnostic(ip string, port int) {
s.Lock()
defer s.Unlock()
@ -90,7 +90,7 @@ func (s *Server) EnableDebug(ip string, port int) {
return
}
logrus.Infof("Starting the diagnose server listening on %d for commands", port)
logrus.Infof("Starting the diagnostic server listening on %d for commands", port)
srv := &http.Server{Addr: fmt.Sprintf("%s:%d", ip, port), Handler: s}
s.srv = srv
s.enable = 1
@ -103,19 +103,19 @@ func (s *Server) EnableDebug(ip string, port int) {
}(s)
}
// DisableDebug stop the dubug and closes the tcp socket
func (s *Server) DisableDebug() {
// DisableDiagnostic stop the dubug and closes the tcp socket
func (s *Server) DisableDiagnostic() {
s.Lock()
defer s.Unlock()
s.srv.Shutdown(context.Background())
s.srv = nil
s.enable = 0
logrus.Info("Disabling the diagnose server")
logrus.Info("Disabling the diagnostic server")
}
// IsDebugEnable returns true when the debug is enabled
func (s *Server) IsDebugEnable() bool {
// IsDiagnosticEnabled returns true when the debug is enabled
func (s *Server) IsDiagnosticEnabled() bool {
s.Lock()
defer s.Unlock()
return s.enable == 1
@ -127,7 +127,7 @@ func notImplemented(ctx interface{}, w http.ResponseWriter, r *http.Request) {
rsp := WrongCommand("not implemented", fmt.Sprintf("URL path: %s no method implemented check /help\n", r.URL.Path))
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("command not implemented done")
HTTPReply(w, rsp, json)
@ -138,7 +138,7 @@ func help(ctx interface{}, w http.ResponseWriter, r *http.Request) {
_, json := ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("help done")
n, ok := ctx.(*Server)
@ -156,7 +156,7 @@ func ready(ctx interface{}, w http.ResponseWriter, r *http.Request) {
_, json := ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("ready done")
HTTPReply(w, CommandSucceed(&StringCmd{Info: "OK"}), json)
}
@ -166,7 +166,7 @@ func stackTrace(ctx interface{}, w http.ResponseWriter, r *http.Request) {
_, json := ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("stack trace")
path, err := stackdump.DumpStacks("/tmp/")

View file

@ -1,4 +1,4 @@
package diagnose
package diagnostic
import "fmt"
@ -31,7 +31,7 @@ func WrongCommand(message, usage string) *HTTPResult {
}
}
// HTTPResult Diagnose Server HTTP result operation
// HTTPResult Diagnostic Server HTTP result operation
type HTTPResult struct {
Message string `json:"message"`
Details StringInterface `json:"details"`

View file

@ -165,16 +165,19 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
}
}
nDB.RUnlock()
if !ok || network.leaving || !nodePresent {
// I'm out of the network OR the event owner is not anymore part of the network so do not propagate
return false
}
nDB.Lock()
e, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key)
if err == nil {
// We have the latest state. Ignore the event
// since it is stale.
if e.ltime >= tEvent.LTime {
nDB.Unlock()
return false
}
}
@ -195,8 +198,6 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
nDB.config.Hostname, nDB.config.NodeID, tEvent)
e.reapTime = nDB.config.reapEntryInterval
}
nDB.Lock()
nDB.createOrUpdateEntry(tEvent.NetworkID, tEvent.TableName, tEvent.Key, e)
nDB.Unlock()

View file

@ -26,13 +26,10 @@ func (e *eventDelegate) NotifyJoin(mn *memberlist.Node) {
e.broadcastNodeEvent(mn.Addr, opCreate)
e.nDB.Lock()
defer e.nDB.Unlock()
// In case the node is rejoining after a failure or leave,
// wait until an explicit join message arrives before adding
// it to the nodes just to make sure this is not a stale
// join. If you don't know about this node add it immediately.
_, fOk := e.nDB.failedNodes[mn.Name]
_, lOk := e.nDB.leftNodes[mn.Name]
if fOk || lOk {
// just add the node back to active
if moved, _ := e.nDB.changeNodeState(mn.Name, nodeActiveState); moved {
return
}

View file

@ -322,18 +322,20 @@ func (nDB *NetworkDB) Peers(nid string) []PeerInfo {
// GetEntry retrieves the value of a table entry in a given (network,
// table, key) tuple
func (nDB *NetworkDB) GetEntry(tname, nid, key string) ([]byte, error) {
nDB.RLock()
defer nDB.RUnlock()
entry, err := nDB.getEntry(tname, nid, key)
if err != nil {
return nil, err
}
if entry != nil && entry.deleting {
return nil, types.NotFoundErrorf("entry in table %s network id %s and key %s deleted and pending garbage collection", tname, nid, key)
}
return entry.value, nil
}
func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) {
nDB.RLock()
defer nDB.RUnlock()
e, ok := nDB.indexes[byTable].Get(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
if !ok {
return nil, types.NotFoundErrorf("could not get entry in table %s with network id %s and key %s", tname, nid, key)
@ -348,13 +350,10 @@ func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) {
// entry for the same tuple for which there is already an existing
// entry unless the current entry is deleting state.
func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error {
nDB.Lock()
oldEntry, err := nDB.getEntry(tname, nid, key)
if err != nil {
if _, ok := err.(types.NotFoundError); !ok {
return fmt.Errorf("cannot create entry in table %s with network id %s and key %s: %v", tname, nid, key, err)
}
}
if oldEntry != nil && !oldEntry.deleting {
if err == nil || (oldEntry != nil && !oldEntry.deleting) {
nDB.Unlock()
return fmt.Errorf("cannot create entry in table %s with network id %s and key %s, already exists", tname, nid, key)
}
@ -364,14 +363,13 @@ func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error {
value: value,
}
nDB.createOrUpdateEntry(nid, tname, key, entry)
nDB.Unlock()
if err := nDB.sendTableEvent(TableEventTypeCreate, nid, tname, key, entry); err != nil {
return fmt.Errorf("cannot send create event for table %s, %v", tname, err)
}
nDB.Lock()
nDB.createOrUpdateEntry(nid, tname, key, entry)
nDB.Unlock()
return nil
}
@ -380,7 +378,9 @@ func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error {
// propagates this event to the cluster. It is an error to update a
// non-existent entry.
func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error {
if _, err := nDB.GetEntry(tname, nid, key); err != nil {
nDB.Lock()
if _, err := nDB.getEntry(tname, nid, key); err != nil {
nDB.Unlock()
return fmt.Errorf("cannot update entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
}
@ -390,14 +390,13 @@ func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error {
value: value,
}
nDB.createOrUpdateEntry(nid, tname, key, entry)
nDB.Unlock()
if err := nDB.sendTableEvent(TableEventTypeUpdate, nid, tname, key, entry); err != nil {
return fmt.Errorf("cannot send table update event: %v", err)
}
nDB.Lock()
nDB.createOrUpdateEntry(nid, tname, key, entry)
nDB.Unlock()
return nil
}
@ -427,27 +426,29 @@ func (nDB *NetworkDB) GetTableByNetwork(tname, nid string) map[string]*TableElem
// table, key) tuple and if the NetworkDB is part of the cluster
// propagates this event to the cluster.
func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
value, err := nDB.GetEntry(tname, nid, key)
if err != nil {
return fmt.Errorf("cannot delete entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
nDB.Lock()
oldEntry, err := nDB.getEntry(tname, nid, key)
if err != nil || oldEntry == nil || oldEntry.deleting {
nDB.Unlock()
return fmt.Errorf("cannot delete entry %s with network id %s and key %s "+
"does not exist or is already being deleted", tname, nid, key)
}
entry := &entry{
ltime: nDB.tableClock.Increment(),
node: nDB.config.NodeID,
value: value,
value: oldEntry.value,
deleting: true,
reapTime: nDB.config.reapEntryInterval,
}
nDB.createOrUpdateEntry(nid, tname, key, entry)
nDB.Unlock()
if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil {
return fmt.Errorf("cannot send table delete event: %v", err)
}
nDB.Lock()
nDB.createOrUpdateEntry(nid, tname, key, entry)
nDB.Unlock()
return nil
}

View file

@ -7,7 +7,7 @@ import (
"strings"
"github.com/docker/libnetwork/common"
"github.com/docker/libnetwork/diagnose"
"github.com/docker/libnetwork/diagnostic"
"github.com/sirupsen/logrus"
)
@ -17,7 +17,7 @@ const (
)
// NetDbPaths2Func TODO
var NetDbPaths2Func = map[string]diagnose.HTTPHandlerFunc{
var NetDbPaths2Func = map[string]diagnostic.HTTPHandlerFunc{
"/join": dbJoin,
"/networkpeers": dbPeers,
"/clusterpeers": dbClusterPeers,
@ -32,17 +32,17 @@ var NetDbPaths2Func = map[string]diagnose.HTTPHandlerFunc{
func dbJoin(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
_, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
_, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("join cluster")
if len(r.Form["members"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?members=ip1,ip2,...", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?members=ip1,ip2,...", r.URL.Path))
log.Error("join cluster failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
@ -50,88 +50,88 @@ func dbJoin(ctx interface{}, w http.ResponseWriter, r *http.Request) {
if ok {
err := nDB.Join(strings.Split(r.Form["members"][0], ","))
if err != nil {
rsp := diagnose.FailCommand(fmt.Errorf("%s error in the DB join %s", r.URL.Path, err))
rsp := diagnostic.FailCommand(fmt.Errorf("%s error in the DB join %s", r.URL.Path, err))
log.WithError(err).Error("join cluster failed")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
log.Info("join cluster done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(nil), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(nil), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbPeers(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
_, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
_, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("network peers")
if len(r.Form["nid"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?nid=test", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?nid=test", r.URL.Path))
log.Error("network peers failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
nDB, ok := ctx.(*NetworkDB)
if ok {
peers := nDB.Peers(r.Form["nid"][0])
rsp := &diagnose.TableObj{Length: len(peers)}
rsp := &diagnostic.TableObj{Length: len(peers)}
for i, peerInfo := range peers {
rsp.Elements = append(rsp.Elements, &diagnose.PeerEntryObj{Index: i, Name: peerInfo.Name, IP: peerInfo.IP})
rsp.Elements = append(rsp.Elements, &diagnostic.PeerEntryObj{Index: i, Name: peerInfo.Name, IP: peerInfo.IP})
}
log.WithField("response", fmt.Sprintf("%+v", rsp)).Info("network peers done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(rsp), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(rsp), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbClusterPeers(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
_, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
_, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("cluster peers")
nDB, ok := ctx.(*NetworkDB)
if ok {
peers := nDB.ClusterPeers()
rsp := &diagnose.TableObj{Length: len(peers)}
rsp := &diagnostic.TableObj{Length: len(peers)}
for i, peerInfo := range peers {
rsp.Elements = append(rsp.Elements, &diagnose.PeerEntryObj{Index: i, Name: peerInfo.Name, IP: peerInfo.IP})
rsp.Elements = append(rsp.Elements, &diagnostic.PeerEntryObj{Index: i, Name: peerInfo.Name, IP: peerInfo.IP})
}
log.WithField("response", fmt.Sprintf("%+v", rsp)).Info("cluster peers done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(rsp), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(rsp), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbCreateEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
unsafe, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
unsafe, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("create entry")
if len(r.Form["tname"]) < 1 ||
len(r.Form["nid"]) < 1 ||
len(r.Form["key"]) < 1 ||
len(r.Form["value"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id&key=k&value=v", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id&key=k&value=v", r.URL.Path))
log.Error("create entry failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
@ -145,7 +145,7 @@ func dbCreateEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
decodedValue, err = base64.StdEncoding.DecodeString(value)
if err != nil {
log.WithError(err).Error("create entry failed")
diagnose.HTTPReply(w, diagnose.FailCommand(err), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(err), json)
return
}
}
@ -153,34 +153,34 @@ func dbCreateEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
nDB, ok := ctx.(*NetworkDB)
if ok {
if err := nDB.CreateEntry(tname, nid, key, decodedValue); err != nil {
rsp := diagnose.FailCommand(err)
diagnose.HTTPReply(w, rsp, json)
rsp := diagnostic.FailCommand(err)
diagnostic.HTTPReply(w, rsp, json)
log.WithError(err).Error("create entry failed")
return
}
log.Info("create entry done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(nil), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(nil), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbUpdateEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
unsafe, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
unsafe, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("update entry")
if len(r.Form["tname"]) < 1 ||
len(r.Form["nid"]) < 1 ||
len(r.Form["key"]) < 1 ||
len(r.Form["value"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id&key=k&value=v", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id&key=k&value=v", r.URL.Path))
log.Error("update entry failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
@ -194,7 +194,7 @@ func dbUpdateEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
decodedValue, err = base64.StdEncoding.DecodeString(value)
if err != nil {
log.WithError(err).Error("update entry failed")
diagnose.HTTPReply(w, diagnose.FailCommand(err), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(err), json)
return
}
}
@ -203,31 +203,31 @@ func dbUpdateEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
if ok {
if err := nDB.UpdateEntry(tname, nid, key, decodedValue); err != nil {
log.WithError(err).Error("update entry failed")
diagnose.HTTPReply(w, diagnose.FailCommand(err), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(err), json)
return
}
log.Info("update entry done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(nil), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(nil), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbDeleteEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
_, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
_, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("delete entry")
if len(r.Form["tname"]) < 1 ||
len(r.Form["nid"]) < 1 ||
len(r.Form["key"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id&key=k", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id&key=k", r.URL.Path))
log.Error("delete entry failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
@ -240,31 +240,31 @@ func dbDeleteEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
err := nDB.DeleteEntry(tname, nid, key)
if err != nil {
log.WithError(err).Error("delete entry failed")
diagnose.HTTPReply(w, diagnose.FailCommand(err), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(err), json)
return
}
log.Info("delete entry done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(nil), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(nil), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbGetEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
unsafe, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
unsafe, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("get entry")
if len(r.Form["tname"]) < 1 ||
len(r.Form["nid"]) < 1 ||
len(r.Form["key"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id&key=k", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id&key=k", r.URL.Path))
log.Error("get entry failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
@ -277,7 +277,7 @@ func dbGetEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
value, err := nDB.GetEntry(tname, nid, key)
if err != nil {
log.WithError(err).Error("get entry failed")
diagnose.HTTPReply(w, diagnose.FailCommand(err), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(err), json)
return
}
@ -288,27 +288,27 @@ func dbGetEntry(ctx interface{}, w http.ResponseWriter, r *http.Request) {
encodedValue = base64.StdEncoding.EncodeToString(value)
}
rsp := &diagnose.TableEntryObj{Key: key, Value: encodedValue}
log.WithField("response", fmt.Sprintf("%+v", rsp)).Info("update entry done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(rsp), json)
rsp := &diagnostic.TableEntryObj{Key: key, Value: encodedValue}
log.WithField("response", fmt.Sprintf("%+v", rsp)).Info("get entry done")
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(rsp), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbJoinNetwork(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
_, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
_, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("join network")
if len(r.Form["nid"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?nid=network_id", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?nid=network_id", r.URL.Path))
log.Error("join network failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
@ -318,29 +318,29 @@ func dbJoinNetwork(ctx interface{}, w http.ResponseWriter, r *http.Request) {
if ok {
if err := nDB.JoinNetwork(nid); err != nil {
log.WithError(err).Error("join network failed")
diagnose.HTTPReply(w, diagnose.FailCommand(err), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(err), json)
return
}
log.Info("join network done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(nil), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(nil), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbLeaveNetwork(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
_, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
_, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("leave network")
if len(r.Form["nid"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?nid=network_id", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?nid=network_id", r.URL.Path))
log.Error("leave network failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
@ -350,30 +350,30 @@ func dbLeaveNetwork(ctx interface{}, w http.ResponseWriter, r *http.Request) {
if ok {
if err := nDB.LeaveNetwork(nid); err != nil {
log.WithError(err).Error("leave network failed")
diagnose.HTTPReply(w, diagnose.FailCommand(err), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(err), json)
return
}
log.Info("leave network done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(nil), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(nil), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}
func dbGetTable(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnose.DebugHTTPForm(r)
unsafe, json := diagnose.ParseHTTPFormOptions(r)
diagnostic.DebugHTTPForm(r)
unsafe, json := diagnostic.ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnose", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
log.Info("get table")
if len(r.Form["tname"]) < 1 ||
len(r.Form["nid"]) < 1 {
rsp := diagnose.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id", r.URL.Path))
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name&nid=network_id", r.URL.Path))
log.Error("get table failed, wrong input")
diagnose.HTTPReply(w, rsp, json)
diagnostic.HTTPReply(w, rsp, json)
return
}
@ -383,7 +383,7 @@ func dbGetTable(ctx interface{}, w http.ResponseWriter, r *http.Request) {
nDB, ok := ctx.(*NetworkDB)
if ok {
table := nDB.GetTableByNetwork(tname, nid)
rsp := &diagnose.TableObj{Length: len(table)}
rsp := &diagnostic.TableObj{Length: len(table)}
var i = 0
for k, v := range table {
var encodedValue string
@ -393,7 +393,7 @@ func dbGetTable(ctx interface{}, w http.ResponseWriter, r *http.Request) {
encodedValue = base64.StdEncoding.EncodeToString(v.Value)
}
rsp.Elements = append(rsp.Elements,
&diagnose.TableEntryObj{
&diagnostic.TableEntryObj{
Index: i,
Key: k,
Value: encodedValue,
@ -402,8 +402,8 @@ func dbGetTable(ctx interface{}, w http.ResponseWriter, r *http.Request) {
i++
}
log.WithField("response", fmt.Sprintf("%+v", rsp)).Info("get table done")
diagnose.HTTPReply(w, diagnose.CommandSucceed(rsp), json)
diagnostic.HTTPReply(w, diagnostic.CommandSucceed(rsp), json)
return
}
diagnose.HTTPReply(w, diagnose.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
}

View file

@ -362,7 +362,7 @@ func (sb *sandbox) rebuildDNS() error {
dnsOpt:
for _, resOpt := range resOptions {
if strings.Contains(resOpt, "ndots") {
for i, option := range dnsOptionsList {
for _, option := range dnsOptionsList {
if strings.Contains(option, "ndots") {
parts := strings.Split(option, ":")
if len(parts) != 2 {
@ -371,10 +371,8 @@ dnsOpt:
if num, err := strconv.Atoi(parts[1]); err != nil {
return fmt.Errorf("invalid number for ndots option %v", option)
} else if num > 0 {
// if the user sets ndots, we mark it as set but we remove the option to guarantee
// that into the container land only ndots:0
// if the user sets ndots, use the user setting
sb.ndotsSet = true
dnsOptionsList = append(dnsOptionsList[:i], dnsOptionsList[i+1:]...)
break dnsOpt
}
}
@ -382,7 +380,11 @@ dnsOpt:
}
}
dnsOptionsList = append(dnsOptionsList, resOptions...)
if !sb.ndotsSet {
// if the user did not set the ndots, set it to 0 to prioritize the service name resolution
// Ref: https://linux.die.net/man/5/resolv.conf
dnsOptionsList = append(dnsOptionsList, resOptions...)
}
_, err = resolvconf.Build(sb.config.resolvConfPath, dnsList, dnsSearchList, dnsOptionsList)
return err

View file

@ -27,7 +27,7 @@ github.com/gorilla/mux v1.1
github.com/hashicorp/consul v0.5.2
github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa0733f7e
github.com/hashicorp/memberlist v0.1.0
github.com/hashicorp/memberlist 3d8438da9589e7b608a83ffac1ef8211486bcb7c
github.com/sean-/seed e2103e2c35297fb7e17febb81e49b312087a2372
github.com/hashicorp/go-sockaddr acd314c5781ea706c710d9ea70069fd2e110d61d
github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870

View file

@ -23,6 +23,8 @@ Please check your installation with:
go version
```
Run `make deps` to fetch dependencies before building
## Usage
Memberlist is surprisingly simple to use. An example is shown below:
@ -63,82 +65,11 @@ For complete documentation, see the associated [Godoc](http://godoc.org/github.c
## Protocol
memberlist is based on ["SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol"](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf),
with a few minor adaptations, mostly to increase propagation speed and
memberlist is based on ["SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol"](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf). However, we extend the protocol in a number of ways:
* Several extensions are made to increase propagation speed and
convergence rate.
* Another set of extensions, that we call Lifeguard, are made to make memberlist more robust in the presence of slow message processing (due to factors such as CPU starvation, and network delay or loss).
A high level overview of the memberlist protocol (based on SWIM) is
described below, but for details please read the full
[SWIM paper](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf)
followed by the memberlist source. We welcome any questions related
For details on all of these extensions, please read our paper "[Lifeguard : SWIM-ing with Situational Awareness](https://arxiv.org/abs/1707.00788)", along with the memberlist source. We welcome any questions related
to the protocol on our issue tracker.
### Protocol Description
memberlist begins by joining an existing cluster or starting a new
cluster. If starting a new cluster, additional nodes are expected to join
it. New nodes in an existing cluster must be given the address of at
least one existing member in order to join the cluster. The new member
does a full state sync with the existing member over TCP and begins gossiping its
existence to the cluster.
Gossip is done over UDP with a configurable but fixed fanout and interval.
This ensures that network usage is constant with regards to number of nodes, as opposed to
exponential growth that can occur with traditional heartbeat mechanisms.
Complete state exchanges with a random node are done periodically over
TCP, but much less often than gossip messages. This increases the likelihood
that the membership list converges properly since the full state is exchanged
and merged. The interval between full state exchanges is configurable or can
be disabled entirely.
Failure detection is done by periodic random probing using a configurable interval.
If the node fails to ack within a reasonable time (typically some multiple
of RTT), then an indirect probe as well as a direct TCP probe are attempted. An
indirect probe asks a configurable number of random nodes to probe the same node,
in case there are network issues causing our own node to fail the probe. The direct
TCP probe is used to help identify the common situation where networking is
misconfigured to allow TCP but not UDP. Without the TCP probe, a UDP-isolated node
would think all other nodes were suspect and could cause churn in the cluster when
it attempts a TCP-based state exchange with another node. It is not desirable to
operate with only TCP connectivity because convergence will be much slower, but it
is enabled so that memberlist can detect this situation and alert operators.
If both our probe, the indirect probes, and the direct TCP probe fail within a
configurable time, then the node is marked "suspicious" and this knowledge is
gossiped to the cluster. A suspicious node is still considered a member of
cluster. If the suspect member of the cluster does not dispute the suspicion
within a configurable period of time, the node is finally considered dead,
and this state is then gossiped to the cluster.
This is a brief and incomplete description of the protocol. For a better idea,
please read the
[SWIM paper](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf)
in its entirety, along with the memberlist source code.
### Changes from SWIM
As mentioned earlier, the memberlist protocol is based on SWIM but includes
minor changes, mostly to increase propagation speed and convergence rates.
The changes from SWIM are noted here:
* memberlist does a full state sync over TCP periodically. SWIM only propagates
changes over gossip. While both eventually reach convergence, the full state
sync increases the likelihood that nodes are fully converged more quickly,
at the expense of more bandwidth usage. This feature can be totally disabled
if you wish.
* memberlist has a dedicated gossip layer separate from the failure detection
protocol. SWIM only piggybacks gossip messages on top of probe/ack messages.
memberlist also piggybacks gossip messages on top of probe/ack messages, but
also will periodically send out dedicated gossip messages on their own. This
feature lets you have a higher gossip rate (for example once per 200ms)
and a slower failure detection rate (such as once per second), resulting
in overall faster convergence rates and data propagation speeds. This feature
can be totally disabed as well, if you wish.
* memberlist stores around the state of dead nodes for a set amount of time,
so that when full syncs are requested, the requester also receives information
about dead nodes. Because SWIM doesn't do full syncs, SWIM deletes dead node
state immediately upon learning that the node is dead. This change again helps
the cluster converge more quickly.

View file

@ -141,6 +141,16 @@ type Config struct {
GossipNodes int
GossipToTheDeadTime time.Duration
// GossipVerifyIncoming controls whether to enforce encryption for incoming
// gossip. It is used for upshifting from unencrypted to encrypted gossip on
// a running cluster.
GossipVerifyIncoming bool
// GossipVerifyOutgoing controls whether to enforce encryption for outgoing
// gossip. It is used for upshifting from unencrypted to encrypted gossip on
// a running cluster.
GossipVerifyOutgoing bool
// EnableCompression is used to control message compression. This can
// be used to reduce bandwidth usage at the cost of slightly more CPU
// utilization. This is only available starting at protocol version 1.
@ -225,7 +235,7 @@ func DefaultLANConfig() *Config {
TCPTimeout: 10 * time.Second, // Timeout after 10 seconds
IndirectChecks: 3, // Use 3 nodes for the indirect ping
RetransmitMult: 4, // Retransmit a message 4 * log(N+1) nodes
SuspicionMult: 5, // Suspect a node for 5 * log(N+1) * Interval
SuspicionMult: 4, // Suspect a node for 4 * log(N+1) * Interval
SuspicionMaxTimeoutMult: 6, // For 10k nodes this will give a max timeout of 120 seconds
PushPullInterval: 30 * time.Second, // Low frequency
ProbeTimeout: 500 * time.Millisecond, // Reasonable RTT time for LAN
@ -233,9 +243,11 @@ func DefaultLANConfig() *Config {
DisableTcpPings: false, // TCP pings are safe, even with mixed versions
AwarenessMaxMultiplier: 8, // Probe interval backs off to 8 seconds
GossipNodes: 3, // Gossip to 3 nodes
GossipInterval: 200 * time.Millisecond, // Gossip more rapidly
GossipToTheDeadTime: 30 * time.Second, // Same as push/pull
GossipNodes: 3, // Gossip to 3 nodes
GossipInterval: 200 * time.Millisecond, // Gossip more rapidly
GossipToTheDeadTime: 30 * time.Second, // Same as push/pull
GossipVerifyIncoming: true,
GossipVerifyOutgoing: true,
EnableCompression: true, // Enable compression by default

View file

@ -22,9 +22,10 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/hashicorp/go-multierror"
multierror "github.com/hashicorp/go-multierror"
sockaddr "github.com/hashicorp/go-sockaddr"
"github.com/miekg/dns"
)
@ -35,11 +36,14 @@ type Memberlist struct {
numNodes uint32 // Number of known nodes (estimate)
config *Config
shutdown bool
shutdown int32 // Used as an atomic boolean value
shutdownCh chan struct{}
leave bool
leave int32 // Used as an atomic boolean value
leaveBroadcast chan struct{}
shutdownLock sync.Mutex // Serializes calls to Shutdown
leaveLock sync.Mutex // Serializes calls to Leave
transport Transport
handoff chan msgHandoff
@ -113,15 +117,44 @@ func newMemberlist(conf *Config) (*Memberlist, error) {
BindPort: conf.BindPort,
Logger: logger,
}
nt, err := NewNetTransport(nc)
// See comment below for details about the retry in here.
makeNetRetry := func(limit int) (*NetTransport, error) {
var err error
for try := 0; try < limit; try++ {
var nt *NetTransport
if nt, err = NewNetTransport(nc); err == nil {
return nt, nil
}
if strings.Contains(err.Error(), "address already in use") {
logger.Printf("[DEBUG] memberlist: Got bind error: %v", err)
continue
}
}
return nil, fmt.Errorf("failed to obtain an address: %v", err)
}
// The dynamic bind port operation is inherently racy because
// even though we are using the kernel to find a port for us, we
// are attempting to bind multiple protocols (and potentially
// multiple addresses) with the same port number. We build in a
// few retries here since this often gets transient errors in
// busy unit tests.
limit := 1
if conf.BindPort == 0 {
limit = 10
}
nt, err := makeNetRetry(limit)
if err != nil {
return nil, fmt.Errorf("Could not set up network transport: %v", err)
}
if conf.BindPort == 0 {
port := nt.GetAutoBindPort()
conf.BindPort = port
logger.Printf("[DEBUG] Using dynamic bind port %d", port)
conf.AdvertisePort = port
logger.Printf("[DEBUG] memberlist: Using dynamic bind port %d", port)
}
transport = nt
}
@ -275,23 +308,17 @@ func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16) ([]ipPort, err
// resolveAddr is used to resolve the address into an address,
// port, and error. If no port is given, use the default
func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) {
// Normalize the incoming string to host:port so we can apply Go's
// parser to it.
port := uint16(0)
if !hasPort(hostStr) {
hostStr += ":" + strconv.Itoa(m.config.BindPort)
}
// This captures the supplied port, or the default one.
hostStr = ensurePort(hostStr, m.config.BindPort)
host, sport, err := net.SplitHostPort(hostStr)
if err != nil {
return nil, err
}
// This will capture the supplied port, or the default one added above.
lport, err := strconv.ParseUint(sport, 10, 16)
if err != nil {
return nil, err
}
port = uint16(lport)
port := uint16(lport)
// If it looks like an IP address we are done. The SplitHostPort() above
// will make sure the host part is in good shape for parsing, even for
@ -525,18 +552,17 @@ func (m *Memberlist) NumMembers() (alive int) {
// This method is safe to call multiple times, but must not be called
// after the cluster is already shut down.
func (m *Memberlist) Leave(timeout time.Duration) error {
m.nodeLock.Lock()
// We can't defer m.nodeLock.Unlock() because m.deadNode will also try to
// acquire a lock so we need to Unlock before that.
m.leaveLock.Lock()
defer m.leaveLock.Unlock()
if m.shutdown {
m.nodeLock.Unlock()
if m.hasShutdown() {
panic("leave after shutdown")
}
if !m.leave {
m.leave = true
if !m.hasLeft() {
atomic.StoreInt32(&m.leave, 1)
m.nodeLock.Lock()
state, ok := m.nodeMap[m.config.Name]
m.nodeLock.Unlock()
if !ok {
@ -562,8 +588,6 @@ func (m *Memberlist) Leave(timeout time.Duration) error {
return fmt.Errorf("timeout waiting for leave broadcast")
}
}
} else {
m.nodeLock.Unlock()
}
return nil
@ -605,21 +629,31 @@ func (m *Memberlist) ProtocolVersion() uint8 {
//
// This method is safe to call multiple times.
func (m *Memberlist) Shutdown() error {
m.nodeLock.Lock()
defer m.nodeLock.Unlock()
m.shutdownLock.Lock()
defer m.shutdownLock.Unlock()
if m.shutdown {
if m.hasShutdown() {
return nil
}
// Shut down the transport first, which should block until it's
// completely torn down. If we kill the memberlist-side handlers
// those I/O handlers might get stuck.
m.transport.Shutdown()
if err := m.transport.Shutdown(); err != nil {
m.logger.Printf("[ERR] Failed to shutdown transport: %v", err)
}
// Now tear down everything else.
m.shutdown = true
atomic.StoreInt32(&m.shutdown, 1)
close(m.shutdownCh)
m.deschedule()
return nil
}
func (m *Memberlist) hasShutdown() bool {
return atomic.LoadInt32(&m.shutdown) == 1
}
func (m *Memberlist) hasLeft() bool {
return atomic.LoadInt32(&m.leave) == 1
}

View file

@ -55,6 +55,7 @@ const (
encryptMsg
nackRespMsg
hasCrcMsg
errMsg
)
// compressionType is used to specify the compression algorithm
@ -105,6 +106,11 @@ type nackResp struct {
SeqNo uint32
}
// err response is sent to relay the error from the remote end
type errResp struct {
Error string
}
// suspect is broadcast when we suspect a node is dead
type suspect struct {
Incarnation uint32
@ -209,6 +215,19 @@ func (m *Memberlist) handleConn(conn net.Conn) {
if err != nil {
if err != io.EOF {
m.logger.Printf("[ERR] memberlist: failed to receive: %s %s", err, LogConn(conn))
resp := errResp{err.Error()}
out, err := encode(errMsg, &resp)
if err != nil {
m.logger.Printf("[ERR] memberlist: Failed to encode error response: %s", err)
return
}
err = m.rawSendMsgStream(conn, out.Bytes())
if err != nil {
m.logger.Printf("[ERR] memberlist: Failed to send error: %s %s", err, LogConn(conn))
return
}
}
return
}
@ -283,8 +302,13 @@ func (m *Memberlist) ingestPacket(buf []byte, from net.Addr, timestamp time.Time
// Decrypt the payload
plain, err := decryptPayload(m.config.Keyring.GetKeys(), buf, nil)
if err != nil {
m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v %s", err, LogAddress(from))
return
if !m.config.GossipVerifyIncoming {
// Treat the message as plaintext
plain = buf
} else {
m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v %s", err, LogAddress(from))
return
}
}
// Continue processing the plaintext buffer
@ -557,7 +581,7 @@ func (m *Memberlist) encodeAndSendMsg(addr string, msgType messageType, msg inte
func (m *Memberlist) sendMsg(addr string, msg []byte) error {
// Check if we can piggy back any messages
bytesAvail := m.config.UDPBufferSize - len(msg) - compoundHeaderOverhead
if m.config.EncryptionEnabled() {
if m.config.EncryptionEnabled() && m.config.GossipVerifyOutgoing {
bytesAvail -= encryptOverhead(m.encryptionVersion())
}
extra := m.getBroadcasts(compoundOverhead, bytesAvail)
@ -621,7 +645,7 @@ func (m *Memberlist) rawSendMsgPacket(addr string, node *Node, msg []byte) error
}
// Check if we have encryption enabled
if m.config.EncryptionEnabled() {
if m.config.EncryptionEnabled() && m.config.GossipVerifyOutgoing {
// Encrypt the payload
var buf bytes.Buffer
primaryKey := m.config.Keyring.GetPrimaryKey()
@ -652,7 +676,7 @@ func (m *Memberlist) rawSendMsgStream(conn net.Conn, sendBuf []byte) error {
}
// Check if encryption is enabled
if m.config.EncryptionEnabled() {
if m.config.EncryptionEnabled() && m.config.GossipVerifyOutgoing {
crypt, err := m.encryptLocalState(sendBuf)
if err != nil {
m.logger.Printf("[ERROR] memberlist: Failed to encrypt local state: %v", err)
@ -721,6 +745,14 @@ func (m *Memberlist) sendAndReceiveState(addr string, join bool) ([]pushNodeStat
return nil, nil, err
}
if msgType == errMsg {
var resp errResp
if err := dec.Decode(&resp); err != nil {
return nil, nil, err
}
return nil, nil, fmt.Errorf("remote error: %v", resp.Error)
}
// Quit if not push/pull
if msgType != pushPullMsg {
err := fmt.Errorf("received invalid msgType (%d), expected pushPullMsg (%d) %s", msgType, pushPullMsg, LogConn(conn))
@ -876,7 +908,7 @@ func (m *Memberlist) readStream(conn net.Conn) (messageType, io.Reader, *codec.D
// Reset message type and bufConn
msgType = messageType(plain[0])
bufConn = bytes.NewReader(plain[1:])
} else if m.config.EncryptionEnabled() {
} else if m.config.EncryptionEnabled() && m.config.GossipVerifyIncoming {
return 0, nil, nil,
fmt.Errorf("Encryption is configured but remote state is not encrypted")
}
@ -1027,7 +1059,7 @@ func (m *Memberlist) readUserMsg(bufConn io.Reader, dec *codec.Decoder) error {
// operations, given the deadline. The bool return parameter is true if we
// we able to round trip a ping to the other node.
func (m *Memberlist) sendPingAndWaitForAck(addr string, ping ping, deadline time.Time) (bool, error) {
conn, err := m.transport.DialTimeout(addr, m.config.TCPTimeout)
conn, err := m.transport.DialTimeout(addr, deadline.Sub(time.Now()))
if err != nil {
// If the node is actually dead we expect this to fail, so we
// shouldn't spam the logs with it. After this point, errors

View file

@ -40,6 +40,11 @@ func (n *Node) Address() string {
return joinHostPort(n.Addr.String(), n.Port)
}
// String returns the node name
func (n *Node) String() string {
return n.Name
}
// NodeState is used to manage our state view of another node
type nodeState struct {
Node
@ -246,10 +251,17 @@ func (m *Memberlist) probeNode(node *nodeState) {
nackCh := make(chan struct{}, m.config.IndirectChecks+1)
m.setProbeChannels(ping.SeqNo, ackCh, nackCh, probeInterval)
// Mark the sent time here, which should be after any pre-processing but
// before system calls to do the actual send. This probably over-reports
// a bit, but it's the best we can do. We had originally put this right
// after the I/O, but that would sometimes give negative RTT measurements
// which was not desirable.
sent := time.Now()
// Send a ping to the node. If this node looks like it's suspect or dead,
// also tack on a suspect message so that it has a chance to refute as
// soon as possible.
deadline := time.Now().Add(probeInterval)
deadline := sent.Add(probeInterval)
addr := node.Address()
if node.State == stateAlive {
if err := m.encodeAndSendMsg(addr, pingMsg, &ping); err != nil {
@ -279,11 +291,6 @@ func (m *Memberlist) probeNode(node *nodeState) {
}
}
// Mark the sent time here, which should be after any pre-processing and
// system calls to do the actual send. This probably under-reports a bit,
// but it's the best we can do.
sent := time.Now()
// Arrange for our self-awareness to get updated. At this point we've
// sent the ping, so any return statement means the probe succeeded
// which will improve our health until we get to the failure scenarios
@ -830,7 +837,7 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
// in-queue to be processed but blocked by the locks above. If we let
// that aliveMsg process, it'll cause us to re-join the cluster. This
// ensures that we don't.
if m.leave && a.Node == m.config.Name {
if m.hasLeft() && a.Node == m.config.Name {
return
}
@ -1106,7 +1113,7 @@ func (m *Memberlist) deadNode(d *dead) {
// Check if this is us
if state.Name == m.config.Name {
// If we are not leaving we need to refute
if !m.leave {
if !m.hasLeft() {
m.refute(state, d.Incarnation)
m.logger.Printf("[WARN] memberlist: Refuting a dead message (from: %s)", d.From)
return // Do not mark ourself dead

View file

@ -117,7 +117,7 @@ func (s *suspicion) Confirm(from string) bool {
// stop the timer then we will call the timeout function directly from
// here.
n := atomic.AddInt32(&s.n, 1)
elapsed := time.Now().Sub(s.start)
elapsed := time.Since(s.start)
remaining := remainingSuspicionTime(n, s.k, elapsed, s.min, s.max)
if s.timer.Stop() {
if remaining > 0 {

View file

@ -17,7 +17,7 @@ type Packet struct {
// Timestamp is the time when the packet was received. This should be
// taken as close as possible to the actual receipt time to help make an
// accurate RTT measurements during probes.
// accurate RTT measurement during probes.
Timestamp time.Time
}

View file

@ -217,20 +217,6 @@ func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) {
return
}
// Given a string of the form "host", "host:port",
// "ipv6::addr" or "[ipv6::address]:port",
// return true if the string includes a port.
func hasPort(s string) bool {
last := strings.LastIndex(s, ":")
if last == -1 {
return false
}
if s[0] == '[' {
return s[last-1] == ']'
}
return strings.Index(s, ":") == last
}
// compressPayload takes an opaque input buffer, compresses it
// and wraps it in a compress{} message that is encoded.
func compressPayload(inp []byte) (*bytes.Buffer, error) {
@ -294,3 +280,31 @@ func decompressBuffer(c *compress) ([]byte, error) {
func joinHostPort(host string, port uint16) string {
return net.JoinHostPort(host, strconv.Itoa(int(port)))
}
// hasPort is given a string of the form "host", "host:port", "ipv6::address",
// or "[ipv6::address]:port", and returns true if the string includes a port.
func hasPort(s string) bool {
// IPv6 address in brackets.
if strings.LastIndex(s, "[") == 0 {
return strings.LastIndex(s, ":") > strings.LastIndex(s, "]")
}
// Otherwise the presence of a single colon determines if there's a port
// since IPv6 addresses outside of brackets (count > 1) can't have a
// port.
return strings.Count(s, ":") == 1
}
// ensurePort makes sure the given string has a port number on it, otherwise it
// appends the given port as a default.
func ensurePort(s string, port int) string {
if hasPort(s) {
return s
}
// If this is an IPv6 address, the join call will add another set of
// brackets, so we have to trim before we add the default port.
s = strings.Trim(s, "[]")
s = net.JoinHostPort(s, strconv.Itoa(port))
return s
}