Browse Source

Add endpoint load-balancing mode

This is the heart of the scalability change for services in libnetwork.
The present routing mesh adds load-balancing rules for a network to
every container connected to the network.  This newer approach creates a
load-balancing endpoint per network per node.  For every service on a
network, libnetwork assigns the VIP of the service to the endpoint's
interface as an alias.  This endpoint must have a unique IP address in
order to route return traffic to it.  Traffic destined for a service's
VIP arrives at the load-balancing endpoint on the VIP and from there,
Linux load balances it among backend destinations while SNATing said
traffic to the endpoint's unique IP address.

The net result of this scheme is that each node in a swarm need only
have one set of load balancing state per service instead of one per
container on the node.  This scheme is very similar to how services
currently operate on Windows nodes in libnetwork.  It (as with Windows
nodes) costs the use of extra IP addresses in a network (one per node)
and an extra network hop in the stack, although, always in the stack
local to the container.

In order to prevent existing deployments from suddenly failing if they
failed to allocate sufficient address space to include per-node
load-balancing endpoint IP addresses, this patch preserves the existing
functionality and activates the new functionality on a per-network
basis depending on whether the network has a load-balancing endpoint.
Eventually, moby should always set this option when creating new
networks and should only omit it for networks created as part of a swarm
that are not marked to use endpoint load balancing.

This patch also normalizes the code to treat "load" and "balancer"
as two separate words from the perspectives of variable/function naming.
This means that the 'b' in "balancer" must be capitalized.

Signed-off-by: Chris Telfer <ctelfer@docker.com>
Chris Telfer 7 years ago
parent
commit
ea2fa20859

+ 1 - 1
libnetwork/controller.go

@@ -871,7 +871,7 @@ addToStore:
 		}
 		}
 	}()
 	}()
 
 
-	if len(network.loadBalancerIP) != 0 {
+	if network.hasLoadBalancerEndpoint() {
 		if err = network.createLoadBalancerSandbox(); err != nil {
 		if err = network.createLoadBalancerSandbox(); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}

+ 6 - 0
libnetwork/endpoint.go

@@ -540,6 +540,12 @@ func (ep *endpoint) sbJoin(sb *sandbox, options ...EndpointOption) (err error) {
 		}
 		}
 	}()
 	}()
 
 
+	// Load balancing endpoints should never have a default gateway nor
+	// should they alter the status of a network's default gateway
+	if ep.loadBalancer && !sb.ingress {
+		return nil
+	}
+
 	if sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil {
 	if sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil {
 		return sb.setupDefaultGW()
 		return sb.setupDefaultGW()
 	}
 	}

+ 13 - 8
libnetwork/network.go

@@ -997,8 +997,8 @@ func (n *network) delete(force bool, rmLBEndpoint bool) error {
 	}
 	}
 
 
 	// Check that the network is empty
 	// Check that the network is empty
-	var emptyCount uint64 = 0
-	if len(n.loadBalancerIP) != 0 {
+	var emptyCount uint64
+	if n.hasLoadBalancerEndpoint() {
 		emptyCount = 1
 		emptyCount = 1
 	}
 	}
 	if !force && n.getEpCnt().EndpointCnt() > emptyCount {
 	if !force && n.getEpCnt().EndpointCnt() > emptyCount {
@@ -1008,7 +1008,7 @@ func (n *network) delete(force bool, rmLBEndpoint bool) error {
 		return &ActiveEndpointsError{name: n.name, id: n.id}
 		return &ActiveEndpointsError{name: n.name, id: n.id}
 	}
 	}
 
 
-	if len(n.loadBalancerIP) != 0 {
+	if n.hasLoadBalancerEndpoint() {
 		// If we got to this point, then the following must hold:
 		// If we got to this point, then the following must hold:
 		//  * force is true OR endpoint count == 1
 		//  * force is true OR endpoint count == 1
 		if err := n.deleteLoadBalancerSandbox(); err != nil {
 		if err := n.deleteLoadBalancerSandbox(); err != nil {
@@ -1077,9 +1077,6 @@ func (n *network) delete(force bool, rmLBEndpoint bool) error {
 	// Cleanup the service discovery for this network
 	// Cleanup the service discovery for this network
 	c.cleanupServiceDiscovery(n.ID())
 	c.cleanupServiceDiscovery(n.ID())
 
 
-	// Cleanup the load balancer
-	c.cleanupServiceBindings(n.ID())
-
 removeFromStore:
 removeFromStore:
 	// deleteFromStore performs an atomic delete operation and the
 	// deleteFromStore performs an atomic delete operation and the
 	// network.epCnt will help prevent any possible
 	// network.epCnt will help prevent any possible
@@ -1931,6 +1928,10 @@ func (n *network) hasSpecialDriver() bool {
 	return n.Type() == "host" || n.Type() == "null"
 	return n.Type() == "host" || n.Type() == "null"
 }
 }
 
 
+func (n *network) hasLoadBalancerEndpoint() bool {
+	return len(n.loadBalancerIP) != 0
+}
+
 func (n *network) ResolveName(req string, ipType int) ([]net.IP, bool) {
 func (n *network) ResolveName(req string, ipType int) ([]net.IP, bool) {
 	var ipv6Miss bool
 	var ipv6Miss bool
 
 
@@ -2111,9 +2112,9 @@ func (c *controller) getConfigNetwork(name string) (*network, error) {
 }
 }
 
 
 func (n *network) lbSandboxName() string {
 func (n *network) lbSandboxName() string {
-	name := n.name + "-sbox"
+	name := "lb-" + n.name
 	if n.ingress {
 	if n.ingress {
-		name = "lb-" + n.name
+		name = n.name + "-sbox"
 	}
 	}
 	return name
 	return name
 }
 }
@@ -2145,6 +2146,10 @@ func (n *network) createLoadBalancerSandbox() (retErr error) {
 		CreateOptionIpam(n.loadBalancerIP, nil, nil, nil),
 		CreateOptionIpam(n.loadBalancerIP, nil, nil, nil),
 		CreateOptionLoadBalancer(),
 		CreateOptionLoadBalancer(),
 	}
 	}
+	if n.hasLoadBalancerEndpoint() && !n.ingress {
+		// Mark LB endpoints as anonymous so they don't show up in DNS
+		epOptions = append(epOptions, CreateOptionAnonymous())
+	}
 	ep, err := n.createEndpoint(endpointName, epOptions...)
 	ep, err := n.createEndpoint(endpointName, epOptions...)
 	if err != nil {
 	if err != nil {
 		return err
 		return err

+ 1 - 17
libnetwork/sandbox.go

@@ -740,16 +740,8 @@ func releaseOSSboxResources(osSbox osl.Sandbox, ep *endpoint) {
 
 
 	ep.Lock()
 	ep.Lock()
 	joinInfo := ep.joinInfo
 	joinInfo := ep.joinInfo
-	vip := ep.virtualIP
 	ep.Unlock()
 	ep.Unlock()
 
 
-	if len(vip) != 0 {
-		loopName := osSbox.GetLoopbackIfaceName()
-		if err := osSbox.RemoveAliasIP(loopName, &net.IPNet{IP: vip, Mask: net.CIDRMask(32, 32)}); err != nil {
-			logrus.Warnf("Remove virtual IP %v failed: %v", vip, err)
-		}
-	}
-
 	if joinInfo == nil {
 	if joinInfo == nil {
 		return
 		return
 	}
 	}
@@ -862,14 +854,6 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
 		}
 		}
 	}
 	}
 
 
-	if len(ep.virtualIP) != 0 {
-		loopName := sb.osSbox.GetLoopbackIfaceName()
-		err := sb.osSbox.AddAliasIP(loopName, &net.IPNet{IP: ep.virtualIP, Mask: net.CIDRMask(32, 32)})
-		if err != nil {
-			return fmt.Errorf("failed to add virtual IP %v: %v", ep.virtualIP, err)
-		}
-	}
-
 	if joinInfo != nil {
 	if joinInfo != nil {
 		// Set up non-interface routes.
 		// Set up non-interface routes.
 		for _, r := range joinInfo.StaticRoutes {
 		for _, r := range joinInfo.StaticRoutes {
@@ -895,7 +879,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
 	// information including gateway and other routes so that
 	// information including gateway and other routes so that
 	// loadbalancers are populated all the network state is in
 	// loadbalancers are populated all the network state is in
 	// place in the sandbox.
 	// place in the sandbox.
-	sb.populateLoadbalancers(ep)
+	sb.populateLoadBalancers(ep)
 
 
 	// Only update the store if we did not come here as part of
 	// Only update the store if we did not come here as part of
 	// sandbox delete. If we came here as part of delete then do
 	// sandbox delete. If we came here as part of delete then do

+ 9 - 8
libnetwork/service_common.go

@@ -289,8 +289,7 @@ func (c *controller) addServiceBinding(svcName, svcID, nID, eID, containerName s
 		logrus.Warnf("addServiceBinding %s possible transient state ok:%t entries:%d set:%t %s", eID, ok, entries, b, setStr)
 		logrus.Warnf("addServiceBinding %s possible transient state ok:%t entries:%d set:%t %s", eID, ok, entries, b, setStr)
 	}
 	}
 
 
-	// Add loadbalancer service and backend in all sandboxes in
-	// the network only if vip is valid.
+	// Add loadbalancer service and backend to the network
 	n.(*network).addLBBackend(ip, lb)
 	n.(*network).addLBBackend(ip, lb)
 
 
 	// Add the appropriate name resolutions
 	// Add the appropriate name resolutions
@@ -305,11 +304,6 @@ func (c *controller) rmServiceBinding(svcName, svcID, nID, eID, containerName st
 
 
 	var rmService bool
 	var rmService bool
 
 
-	n, err := c.NetworkByID(nID)
-	if err != nil {
-		return err
-	}
-
 	skey := serviceKey{
 	skey := serviceKey{
 		id:    svcID,
 		id:    svcID,
 		ports: portConfigs(ingressPorts).String(),
 		ports: portConfigs(ingressPorts).String(),
@@ -367,7 +361,14 @@ func (c *controller) rmServiceBinding(svcName, svcID, nID, eID, containerName st
 	// Remove loadbalancer service(if needed) and backend in all
 	// Remove loadbalancer service(if needed) and backend in all
 	// sandboxes in the network only if the vip is valid.
 	// sandboxes in the network only if the vip is valid.
 	if entries == 0 {
 	if entries == 0 {
-		n.(*network).rmLBBackend(ip, lb, rmService, fullRemove)
+		// The network may well have been deleted before the last
+		// of the service bindings.  That's ok, because removing
+		// the network sandbox implicitly removes the backend
+		// service bindings.
+		n, err := c.NetworkByID(nID)
+		if err == nil {
+			n.(*network).rmLBBackend(ip, lb, rmService, fullRemove)
+		}
 	}
 	}
 
 
 	// Delete the name resolutions
 	// Delete the name resolutions

+ 109 - 137
libnetwork/service_linux.go

@@ -30,40 +30,9 @@ func init() {
 	reexec.Register("redirecter", redirecter)
 	reexec.Register("redirecter", redirecter)
 }
 }
 
 
-// Get all loadbalancers on this network that is currently discovered
-// on this node.
-func (n *network) connectedLoadbalancers() []*loadBalancer {
-	c := n.getController()
-
-	c.Lock()
-	serviceBindings := make([]*service, 0, len(c.serviceBindings))
-	for _, s := range c.serviceBindings {
-		serviceBindings = append(serviceBindings, s)
-	}
-	c.Unlock()
-
-	var lbs []*loadBalancer
-	for _, s := range serviceBindings {
-		s.Lock()
-		// Skip the serviceBindings that got deleted
-		if s.deleted {
-			s.Unlock()
-			continue
-		}
-		if lb, ok := s.loadBalancers[n.ID()]; ok {
-			lbs = append(lbs, lb)
-		}
-		s.Unlock()
-	}
-
-	return lbs
-}
-
 // Populate all loadbalancers on the network that the passed endpoint
 // Populate all loadbalancers on the network that the passed endpoint
 // belongs to, into this sandbox.
 // belongs to, into this sandbox.
-func (sb *sandbox) populateLoadbalancers(ep *endpoint) {
-	var gwIP net.IP
-
+func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
 	// This is an interface less endpoint. Nothing to do.
 	// This is an interface less endpoint. Nothing to do.
 	if ep.Iface() == nil {
 	if ep.Iface() == nil {
 		return
 		return
@@ -77,102 +46,67 @@ func (sb *sandbox) populateLoadbalancers(ep *endpoint) {
 			logrus.Errorf("Failed to add redirect rules for ep %s (%s): %v", ep.Name(), ep.ID()[0:7], err)
 			logrus.Errorf("Failed to add redirect rules for ep %s (%s): %v", ep.Name(), ep.ID()[0:7], err)
 		}
 		}
 	}
 	}
+}
 
 
-	if sb.ingress {
-		// For the ingress sandbox if this is not gateway
-		// endpoint do nothing.
-		if ep != sb.getGatewayEndpoint() {
-			return
-		}
-
-		// This is the gateway endpoint. Now get the ingress
-		// network and plumb the loadbalancers.
-		gwIP = ep.Iface().Address().IP
-		for _, ep := range sb.getConnectedEndpoints() {
-			if !ep.endpointInGWNetwork() {
-				n = ep.getNetwork()
-				eIP = ep.Iface().Address()
-			}
+func (n *network) findLBEndpointSandbox() (*endpoint, *sandbox, error) {
+	// TODO: get endpoint from store?  See EndpointInfo()
+	var ep *endpoint
+	// Find this node's LB sandbox endpoint:  there should be exactly one
+	for _, e := range n.Endpoints() {
+		epi := e.Info()
+		if epi != nil && epi.LoadBalancer() {
+			ep = e.(*endpoint)
+			break
 		}
 		}
 	}
 	}
+	if ep == nil {
+		return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID())
+	}
+	// Get the load balancer sandbox itself as well
+	sb, ok := ep.getSandbox()
+	if !ok {
+		return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
+	}
+	ep = sb.getEndpoint(ep.ID())
+	if ep == nil {
+		return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
+	}
+	return ep, sb, nil
+}
 
 
-	for _, lb := range n.connectedLoadbalancers() {
-		// Skip if vip is not valid.
-		if len(lb.vip) == 0 {
-			continue
-		}
-
-		lb.service.Lock()
-		for _, be := range lb.backEnds {
-			if !be.disabled {
-				sb.addLBBackend(be.ip, lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, gwIP, n.ingress)
-			}
+// Searches the OS sandbox for the name of the endpoint interface
+// within the sandbox.   This is required for adding/removing IP
+// aliases to the interface.
+func findIfaceDstName(sb *sandbox, ep *endpoint) string {
+	srcName := ep.Iface().SrcName()
+	for _, i := range sb.osSbox.Info().Interfaces() {
+		if i.SrcName() == srcName {
+			return i.DstName()
 		}
 		}
-		lb.service.Unlock()
 	}
 	}
+	return ""
 }
 }
 
 
-// Add loadbalancer backend to all sandboxes which has a connection to
-// this network. If needed add the service as well.
+// Add loadbalancer backend to the loadbalncer sandbox for the network.
+// If needed add the service as well.
 func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
 func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
 	if len(lb.vip) == 0 {
 	if len(lb.vip) == 0 {
 		return
 		return
 	}
 	}
-	n.WalkEndpoints(func(e Endpoint) bool {
-		ep := e.(*endpoint)
-		if sb, ok := ep.getSandbox(); ok {
-			if !sb.isEndpointPopulated(ep) {
-				return false
-			}
-
-			var gwIP net.IP
-			if ep := sb.getGatewayEndpoint(); ep != nil {
-				gwIP = ep.Iface().Address().IP
-			}
-
-			sb.addLBBackend(ip, lb.vip, lb.fwMark, lb.service.ingressPorts, ep.Iface().Address(), gwIP, n.ingress)
-		}
-
-		return false
-	})
-}
-
-// Remove loadbalancer backend from all sandboxes which has a
-// connection to this network. If needed remove the service entry as
-// well, as specified by the rmService bool.
-func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
-	if len(lb.vip) == 0 {
+	ep, sb, err := n.findLBEndpointSandbox()
+	if err != nil {
+		logrus.Errorf("error in addLBBackend for %s/%s for %v", n.ID(), n.Name(), err)
 		return
 		return
 	}
 	}
-	n.WalkEndpoints(func(e Endpoint) bool {
-		ep := e.(*endpoint)
-		if sb, ok := ep.getSandbox(); ok {
-			if !sb.isEndpointPopulated(ep) {
-				return false
-			}
-
-			var gwIP net.IP
-			if ep := sb.getGatewayEndpoint(); ep != nil {
-				gwIP = ep.Iface().Address().IP
-			}
-
-			sb.rmLBBackend(ip, lb.vip, lb.fwMark, lb.service.ingressPorts, ep.Iface().Address(), gwIP, rmService, fullRemove, n.ingress)
-		}
-
-		return false
-	})
-}
-
-// Add loadbalancer backend into one connected sandbox.
-func (sb *sandbox) addLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, gwIP net.IP, isIngressNetwork bool) {
 	if sb.osSbox == nil {
 	if sb.osSbox == nil {
 		return
 		return
 	}
 	}
-
-	if isIngressNetwork && !sb.ingress {
+	if n.ingress && !sb.ingress {
 		return
 		return
 	}
 	}
 
 
+	eIP := ep.Iface().Address()
+
 	i, err := ipvs.New(sb.Key())
 	i, err := ipvs.New(sb.Key())
 	if err != nil {
 	if err != nil {
 		logrus.Errorf("Failed to create an ipvs handle for sbox %s (%s,%s) for lb addition: %v", sb.ID()[0:7], sb.ContainerID()[0:7], sb.Key(), err)
 		logrus.Errorf("Failed to create an ipvs handle for sbox %s (%s,%s) for lb addition: %v", sb.ID()[0:7], sb.ContainerID()[0:7], sb.Key(), err)
@@ -182,28 +116,43 @@ func (sb *sandbox) addLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*P
 
 
 	s := &ipvs.Service{
 	s := &ipvs.Service{
 		AddressFamily: nl.FAMILY_V4,
 		AddressFamily: nl.FAMILY_V4,
-		FWMark:        fwMark,
+		FWMark:        lb.fwMark,
 		SchedName:     ipvs.RoundRobin,
 		SchedName:     ipvs.RoundRobin,
 	}
 	}
 
 
 	if !i.IsServicePresent(s) {
 	if !i.IsServicePresent(s) {
-		var filteredPorts []*PortConfig
+		// Add IP alias for the VIP to the endpoint
+		ifName := findIfaceDstName(sb, ep)
+		if ifName == "" {
+			logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
+			return
+		}
+		err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
+		if err != nil {
+			logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
+			return
+		}
+
 		if sb.ingress {
 		if sb.ingress {
-			filteredPorts = filterPortConfigs(ingressPorts, false)
+			var gwIP net.IP
+			if ep := sb.getGatewayEndpoint(); ep != nil {
+				gwIP = ep.Iface().Address().IP
+			}
+			filteredPorts := filterPortConfigs(lb.service.ingressPorts, false)
 			if err := programIngress(gwIP, filteredPorts, false); err != nil {
 			if err := programIngress(gwIP, filteredPorts, false); err != nil {
 				logrus.Errorf("Failed to add ingress: %v", err)
 				logrus.Errorf("Failed to add ingress: %v", err)
 				return
 				return
 			}
 			}
 		}
 		}
 
 
-		logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %s (%s)", vip, fwMark, ingressPorts, sb.ID()[0:7], sb.ContainerID()[0:7])
-		if err := invokeFWMarker(sb.Key(), vip, fwMark, ingressPorts, eIP, false); err != nil {
+		logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %s (%s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID()[0:7], sb.ContainerID()[0:7])
+		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false); err != nil {
 			logrus.Errorf("Failed to add firewall mark rule in sbox %s (%s): %v", sb.ID()[0:7], sb.ContainerID()[0:7], err)
 			logrus.Errorf("Failed to add firewall mark rule in sbox %s (%s): %v", sb.ID()[0:7], sb.ContainerID()[0:7], err)
 			return
 			return
 		}
 		}
 
 
 		if err := i.NewService(s); err != nil && err != syscall.EEXIST {
 		if err := i.NewService(s); err != nil && err != syscall.EEXIST {
-			logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %s (%s): %v", vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
+			logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %s (%s): %v", lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
 			return
 			return
 		}
 		}
 	}
 	}
@@ -218,20 +167,32 @@ func (sb *sandbox) addLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*P
 	// destination.
 	// destination.
 	s.SchedName = ""
 	s.SchedName = ""
 	if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
 	if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
-		logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %s (%s): %v", ip, vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
+		logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %s (%s): %v", ip, lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
 	}
 	}
 }
 }
 
 
-// Remove loadbalancer backend from one connected sandbox.
-func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, gwIP net.IP, rmService bool, fullRemove bool, isIngressNetwork bool) {
+// Remove loadbalancer backend the load balancing endpoint for this
+// network. If 'rmService' is true, then remove the service entry as well.
+// If 'fullRemove' is true then completely remove the entry, otherwise
+// just deweight it for now.
+func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
+	if len(lb.vip) == 0 {
+		return
+	}
+	ep, sb, err := n.findLBEndpointSandbox()
+	if err != nil {
+		logrus.Errorf("error in rmLBBackend for %s/%s for %v", n.ID(), n.Name(), err)
+		return
+	}
 	if sb.osSbox == nil {
 	if sb.osSbox == nil {
 		return
 		return
 	}
 	}
-
-	if isIngressNetwork && !sb.ingress {
+	if n.ingress && !sb.ingress {
 		return
 		return
 	}
 	}
 
 
+	eIP := ep.Iface().Address()
+
 	i, err := ipvs.New(sb.Key())
 	i, err := ipvs.New(sb.Key())
 	if err != nil {
 	if err != nil {
 		logrus.Errorf("Failed to create an ipvs handle for sbox %s (%s,%s) for lb removal: %v", sb.ID()[0:7], sb.ContainerID()[0:7], sb.Key(), err)
 		logrus.Errorf("Failed to create an ipvs handle for sbox %s (%s,%s) for lb removal: %v", sb.ID()[0:7], sb.ContainerID()[0:7], sb.Key(), err)
@@ -241,7 +202,7 @@ func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Po
 
 
 	s := &ipvs.Service{
 	s := &ipvs.Service{
 		AddressFamily: nl.FAMILY_V4,
 		AddressFamily: nl.FAMILY_V4,
-		FWMark:        fwMark,
+		FWMark:        lb.fwMark,
 	}
 	}
 
 
 	d := &ipvs.Destination{
 	d := &ipvs.Destination{
@@ -252,32 +213,46 @@ func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Po
 
 
 	if fullRemove {
 	if fullRemove {
 		if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
 		if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
-			logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %s (%s): %v", ip, vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
+			logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %s (%s): %v", ip, lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
 		}
 		}
 	} else {
 	} else {
 		d.Weight = 0
 		d.Weight = 0
 		if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
 		if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
-			logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %s (%s): %v", ip, vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
+			logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %s (%s): %v", ip, lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
 		}
 		}
 	}
 	}
 
 
 	if rmService {
 	if rmService {
 		s.SchedName = ipvs.RoundRobin
 		s.SchedName = ipvs.RoundRobin
 		if err := i.DelService(s); err != nil && err != syscall.ENOENT {
 		if err := i.DelService(s); err != nil && err != syscall.ENOENT {
-			logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %s (%s): %v", vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
+			logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %s (%s): %v", lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
 		}
 		}
 
 
-		var filteredPorts []*PortConfig
 		if sb.ingress {
 		if sb.ingress {
-			filteredPorts = filterPortConfigs(ingressPorts, true)
+			var gwIP net.IP
+			if ep := sb.getGatewayEndpoint(); ep != nil {
+				gwIP = ep.Iface().Address().IP
+			}
+			filteredPorts := filterPortConfigs(lb.service.ingressPorts, true)
 			if err := programIngress(gwIP, filteredPorts, true); err != nil {
 			if err := programIngress(gwIP, filteredPorts, true); err != nil {
 				logrus.Errorf("Failed to delete ingress: %v", err)
 				logrus.Errorf("Failed to delete ingress: %v", err)
 			}
 			}
 		}
 		}
 
 
-		if err := invokeFWMarker(sb.Key(), vip, fwMark, ingressPorts, eIP, true); err != nil {
+		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true); err != nil {
 			logrus.Errorf("Failed to delete firewall mark rule in sbox %s (%s): %v", sb.ID()[0:7], sb.ContainerID()[0:7], err)
 			logrus.Errorf("Failed to delete firewall mark rule in sbox %s (%s): %v", sb.ID()[0:7], sb.ContainerID()[0:7], err)
 		}
 		}
+
+		// Remove IP alias from the VIP to the endpoint
+		ifName := findIfaceDstName(sb, ep)
+		if ifName == "" {
+			logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
+			return
+		}
+		err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
+		if err != nil {
+			logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
+		}
 	}
 	}
 }
 }
 
 
@@ -623,7 +598,7 @@ func fwMarker() {
 		ingressPorts, err = readPortsFromFile(os.Args[5])
 		ingressPorts, err = readPortsFromFile(os.Args[5])
 		if err != nil {
 		if err != nil {
 			logrus.Errorf("Failed reading ingress ports file: %v", err)
 			logrus.Errorf("Failed reading ingress ports file: %v", err)
-			os.Exit(6)
+			os.Exit(2)
 		}
 		}
 	}
 	}
 
 
@@ -631,7 +606,7 @@ func fwMarker() {
 	fwMark, err := strconv.ParseUint(os.Args[3], 10, 32)
 	fwMark, err := strconv.ParseUint(os.Args[3], 10, 32)
 	if err != nil {
 	if err != nil {
 		logrus.Errorf("bad fwmark value(%s) passed: %v", os.Args[3], err)
 		logrus.Errorf("bad fwmark value(%s) passed: %v", os.Args[3], err)
-		os.Exit(2)
+		os.Exit(3)
 	}
 	}
 	addDelOpt := os.Args[4]
 	addDelOpt := os.Args[4]
 
 
@@ -645,20 +620,20 @@ func fwMarker() {
 	ns, err := netns.GetFromPath(os.Args[1])
 	ns, err := netns.GetFromPath(os.Args[1])
 	if err != nil {
 	if err != nil {
 		logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
 		logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
-		os.Exit(3)
+		os.Exit(4)
 	}
 	}
 	defer ns.Close()
 	defer ns.Close()
 
 
 	if err := netns.Set(ns); err != nil {
 	if err := netns.Set(ns); err != nil {
 		logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
 		logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
-		os.Exit(4)
+		os.Exit(5)
 	}
 	}
 
 
 	if addDelOpt == "-A" {
 	if addDelOpt == "-A" {
 		eIP, subnet, err := net.ParseCIDR(os.Args[6])
 		eIP, subnet, err := net.ParseCIDR(os.Args[6])
 		if err != nil {
 		if err != nil {
 			logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
 			logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
-			os.Exit(9)
+			os.Exit(6)
 		}
 		}
 
 
 		ruleParams := strings.Fields(fmt.Sprintf("-m ipvs --ipvs -d %s -j SNAT --to-source %s", subnet, eIP))
 		ruleParams := strings.Fields(fmt.Sprintf("-m ipvs --ipvs -d %s -j SNAT --to-source %s", subnet, eIP))
@@ -669,21 +644,18 @@ func fwMarker() {
 			err := ioutil.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
 			err := ioutil.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
 			if err != nil {
 			if err != nil {
 				logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
 				logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
-				os.Exit(8)
+				os.Exit(7)
 			}
 			}
 		}
 		}
 	}
 	}
 
 
-	rule := strings.Fields(fmt.Sprintf("-t mangle %s OUTPUT -d %s/32 -j MARK --set-mark %d", addDelOpt, vip, fwMark))
-	rules = append(rules, rule)
-
-	rule = strings.Fields(fmt.Sprintf("-t nat %s OUTPUT -p icmp --icmp echo-request -d %s -j DNAT --to 127.0.0.1", addDelOpt, vip))
+	rule := strings.Fields(fmt.Sprintf("-t mangle %s INPUT -d %s/32 -j MARK --set-mark %d", addDelOpt, vip, fwMark))
 	rules = append(rules, rule)
 	rules = append(rules, rule)
 
 
 	for _, rule := range rules {
 	for _, rule := range rules {
 		if err := iptables.RawCombinedOutputNative(rule...); err != nil {
 		if err := iptables.RawCombinedOutputNative(rule...); err != nil {
 			logrus.Errorf("setting up rule failed, %v: %v", rule, err)
 			logrus.Errorf("setting up rule failed, %v: %v", rule, err)
-			os.Exit(5)
+			os.Exit(8)
 		}
 		}
 	}
 	}
 }
 }

+ 1 - 1
libnetwork/service_unsupported.go

@@ -18,7 +18,7 @@ func (c *controller) rmServiceBinding(name, sid, nid, eid string, vip net.IP, in
 	return fmt.Errorf("not supported")
 	return fmt.Errorf("not supported")
 }
 }
 
 
-func (sb *sandbox) populateLoadbalancers(ep *endpoint) {
+func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
 }
 }
 
 
 func arrangeIngressFilterRule() {
 func arrangeIngressFilterRule() {

+ 1 - 4
libnetwork/service_windows.go

@@ -128,9 +128,6 @@ func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullR
 		return
 		return
 	}
 	}
 
 
-	vip := lb.vip
-	ingressPorts := lb.service.ingressPorts
-
 	if system.GetOSVersion().Build > 16236 {
 	if system.GetOSVersion().Build > 16236 {
 		if numEnabledBackends(lb) > 0 {
 		if numEnabledBackends(lb) > 0 {
 			//Reprogram HNS (actually VFP) with the existing backends.
 			//Reprogram HNS (actually VFP) with the existing backends.
@@ -169,7 +166,7 @@ func numEnabledBackends(lb *loadBalancer) int {
 	return nEnabled
 	return nEnabled
 }
 }
 
 
-func (sb *sandbox) populateLoadbalancers(ep *endpoint) {
+func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
 }
 }
 
 
 func arrangeIngressFilterRule() {
 func arrangeIngressFilterRule() {