Forráskód Böngészése

Merge pull request #821 from mrjana/overlay

Add overlay network support in < 3.16 kernels
aboch 9 éve
szülő
commit
f95f14d77b

+ 119 - 0
libnetwork/drivers/overlay/filter.go

@@ -0,0 +1,119 @@
+package overlay
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/docker/libnetwork/iptables"
+)
+
+const globalChain = "DOCKER-OVERLAY"
+
+var filterOnce sync.Once
+
+func rawIPTables(args ...string) error {
+	if output, err := iptables.Raw(args...); err != nil {
+		return fmt.Errorf("unable to add overlay filter: %v", err)
+	} else if len(output) != 0 {
+		return fmt.Errorf("unable to add overlay filter: %s", string(output))
+	}
+
+	return nil
+}
+
+func setupGlobalChain() {
+	if err := rawIPTables("-N", globalChain); err != nil {
+		logrus.Errorf("could not create global overlay chain: %v", err)
+		return
+	}
+
+	if err := rawIPTables("-A", globalChain, "-j", "RETURN"); err != nil {
+		logrus.Errorf("could not install default return chain in the overlay global chain: %v", err)
+		return
+	}
+}
+
+func setNetworkChain(cname string, remove bool) error {
+	// Initialize the onetime global overlay chain
+	filterOnce.Do(setupGlobalChain)
+
+	opt := "-N"
+	// In case of remove, make sure to flush the rules in the chain
+	if remove {
+		if err := rawIPTables("-F", cname); err != nil {
+			return fmt.Errorf("failed to flush overlay network chain %s rules: %v", cname, err)
+		}
+		opt = "-X"
+	}
+
+	if err := rawIPTables(opt, cname); err != nil {
+		return fmt.Errorf("failed network chain operation %q for chain %s: %v", opt, cname, err)
+	}
+
+	if !remove {
+		if err := rawIPTables("-A", cname, "-j", "DROP"); err != nil {
+			return fmt.Errorf("failed adding default drop rule to overlay network chain %s: %v", cname, err)
+		}
+	}
+
+	return nil
+}
+
+func addNetworkChain(cname string) error {
+	return setNetworkChain(cname, false)
+}
+
+func removeNetworkChain(cname string) error {
+	return setNetworkChain(cname, true)
+}
+
+func setFilters(cname, brName string, remove bool) error {
+	opt := "-I"
+	if remove {
+		opt = "-D"
+	}
+
+	// Everytime we set filters for a new subnet make sure to move the global overlay hook to the top of the both the OUTPUT and forward chains
+	if !remove {
+		for _, chain := range []string{"OUTPUT", "FORWARD"} {
+			exists := iptables.Exists(iptables.Filter, chain, "-j", globalChain)
+			if exists {
+				if err := rawIPTables("-D", chain, "-j", globalChain); err != nil {
+					return fmt.Errorf("failed to delete overlay hook in chain %s while moving the hook: %v", chain, err)
+				}
+			}
+
+			if err := rawIPTables("-I", chain, "-j", globalChain); err != nil {
+				return fmt.Errorf("failed to insert overlay hook in chain %s: %v", chain, err)
+			}
+		}
+	}
+
+	// Insert/Delete the rule to jump to per-bridge chain
+	exists := iptables.Exists(iptables.Filter, globalChain, "-o", brName, "-j", cname)
+	if (!remove && !exists) || (remove && exists) {
+		if err := rawIPTables(opt, globalChain, "-o", brName, "-j", cname); err != nil {
+			return fmt.Errorf("failed to add per-bridge filter rule for bridge %s, network chain %s: %v", brName, cname, err)
+		}
+	}
+
+	exists = iptables.Exists(iptables.Filter, cname, "-i", brName, "-j", "ACCEPT")
+	if (!remove && exists) || (remove && !exists) {
+		return nil
+	}
+
+	if err := rawIPTables(opt, cname, "-i", brName, "-j", "ACCEPT"); err != nil {
+		return fmt.Errorf("failed to add overlay filter rile for network chain %s, bridge %s: %v", cname, brName, err)
+	}
+
+	return nil
+}
+
+func addFilters(cname, brName string) error {
+	return setFilters(cname, brName, false)
+}
+
+func removeFilters(cname, brName string) error {
+	return setFilters(cname, brName, true)
+}

+ 109 - 22
libnetwork/drivers/overlay/ov_network.go

@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net"
+	"os"
 	"sync"
 	"syscall"
 
@@ -12,11 +13,17 @@ import (
 	"github.com/docker/libnetwork/driverapi"
 	"github.com/docker/libnetwork/netutils"
 	"github.com/docker/libnetwork/osl"
+	"github.com/docker/libnetwork/resolvconf"
 	"github.com/docker/libnetwork/types"
 	"github.com/vishvananda/netlink"
 	"github.com/vishvananda/netlink/nl"
 )
 
+var (
+	hostMode     bool
+	hostModeOnce sync.Once
+)
+
 type networkTable map[string]*network
 
 type subnet struct {
@@ -87,22 +94,6 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Dat
 	return nil
 }
 
-/* func (d *driver) createNetworkfromStore(nid string) (*network, error) {
-	n := &network{
-		id:        nid,
-		driver:    d,
-		endpoints: endpointTable{},
-		once:      &sync.Once{},
-		subnets:   []*subnet{},
-	}
-
-	err := d.store.GetObject(datastore.Key(n.Key()...), n)
-	if err != nil {
-		return nil, fmt.Errorf("unable to get network %q from data store, %v", nid, err)
-	}
-	return n, nil
-}*/
-
 func (d *driver) DeleteNetwork(nid string) error {
 	if nid == "" {
 		return fmt.Errorf("invalid network id")
@@ -171,6 +162,12 @@ func (n *network) destroySandbox() {
 		}
 
 		for _, s := range n.subnets {
+			if hostMode {
+				if err := removeFilters(n.id[:12], s.brName); err != nil {
+					logrus.Warnf("Could not remove overlay filters: %v", err)
+				}
+			}
+
 			if s.vxlanName != "" {
 				err := deleteVxlan(s.vxlanName)
 				if err != nil {
@@ -178,17 +175,88 @@ func (n *network) destroySandbox() {
 				}
 			}
 		}
+
+		if hostMode {
+			if err := removeNetworkChain(n.id[:12]); err != nil {
+				logrus.Warnf("could not remove network chain: %v", err)
+			}
+		}
+
 		sbox.Destroy()
 		n.setSandbox(nil)
 	}
 }
 
-func (n *network) initSubnetSandbox(s *subnet) error {
-	// create a bridge and vxlan device for this subnet and move it to the sandbox
-	brName, err := netutils.GenerateIfaceName("bridge", 7)
+func setHostMode() {
+	if os.Getenv("_OVERLAY_HOST_MODE") != "" {
+		hostMode = true
+		return
+	}
+
+	err := createVxlan("testvxlan", 1)
 	if err != nil {
-		return err
+		logrus.Errorf("Failed to create testvxlan interface: %v", err)
+		return
 	}
+
+	defer deleteVxlan("testvxlan")
+
+	path := "/proc/self/ns/net"
+	f, err := os.OpenFile(path, os.O_RDONLY, 0)
+	if err != nil {
+		logrus.Errorf("Failed to open path %s for network namespace for setting host mode: %v", path, err)
+		return
+	}
+	defer f.Close()
+
+	nsFD := f.Fd()
+
+	iface, err := netlink.LinkByName("testvxlan")
+	if err != nil {
+		logrus.Errorf("Failed to get link testvxlan: %v", err)
+		return
+	}
+
+	// If we are not able to move the vxlan interface to a namespace
+	// then fallback to host mode
+	if err := netlink.LinkSetNsFd(iface, int(nsFD)); err != nil {
+		hostMode = true
+	}
+}
+
+func (n *network) generateVxlanName(s *subnet) string {
+	return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5]
+}
+
+func (n *network) generateBridgeName(s *subnet) string {
+	return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5]
+}
+
+func isOverlap(nw *net.IPNet) bool {
+	var nameservers []string
+
+	if rc, err := resolvconf.Get(); err == nil {
+		nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
+	}
+
+	if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
+		return true
+	}
+
+	if err := netutils.CheckRouteOverlaps(nw); err != nil {
+		return true
+	}
+
+	return false
+}
+
+func (n *network) initSubnetSandbox(s *subnet) error {
+	if hostMode && isOverlap(s.subnetIP) {
+		return fmt.Errorf("overlay subnet %s has conflicts in the host while running in host mode", s.subnetIP.String())
+	}
+
+	// create a bridge and vxlan device for this subnet and move it to the sandbox
+	brName := n.generateBridgeName(s)
 	sbox := n.sandbox()
 
 	if err := sbox.AddInterface(brName, "br",
@@ -197,7 +265,12 @@ func (n *network) initSubnetSandbox(s *subnet) error {
 		return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
 	}
 
-	vxlanName, err := createVxlan(n.vxlanID(s))
+	vxlanName := n.generateVxlanName(s)
+
+	// Try to delete the vxlan interface if already present
+	deleteVxlan(vxlanName)
+
+	err := createVxlan(vxlanName, n.vxlanID(s))
 	if err != nil {
 		return err
 	}
@@ -207,6 +280,12 @@ func (n *network) initSubnetSandbox(s *subnet) error {
 		return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
 	}
 
+	if hostMode {
+		if err := addFilters(n.id[:12], brName); err != nil {
+			return err
+		}
+	}
+
 	n.Lock()
 	s.vxlanName = vxlanName
 	s.brName = brName
@@ -220,8 +299,16 @@ func (n *network) initSandbox() error {
 	n.initEpoch++
 	n.Unlock()
 
+	hostModeOnce.Do(setHostMode)
+
+	if hostMode {
+		if err := addNetworkChain(n.id[:12]); err != nil {
+			return err
+		}
+	}
+
 	sbox, err := osl.NewSandbox(
-		osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), true)
+		osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), !hostMode)
 	if err != nil {
 		return fmt.Errorf("could not create network sandbox: %v", err)
 	}

+ 3 - 8
libnetwork/drivers/overlay/ov_utils.go

@@ -47,14 +47,9 @@ func createVethPair() (string, string, error) {
 	return name1, name2, nil
 }
 
-func createVxlan(vni uint32) (string, error) {
+func createVxlan(name string, vni uint32) error {
 	defer osl.InitOSContext()()
 
-	name, err := netutils.GenerateIfaceName("vxlan", 7)
-	if err != nil {
-		return "", fmt.Errorf("error generating vxlan name: %v", err)
-	}
-
 	vxlan := &netlink.Vxlan{
 		LinkAttrs: netlink.LinkAttrs{Name: name},
 		VxlanId:   int(vni),
@@ -66,10 +61,10 @@ func createVxlan(vni uint32) (string, error) {
 	}
 
 	if err := netlink.LinkAdd(vxlan); err != nil {
-		return "", fmt.Errorf("error creating vxlan interface: %v", err)
+		return fmt.Errorf("error creating vxlan interface: %v", err)
 	}
 
-	return name, nil
+	return nil
 }
 
 func deleteVxlan(name string) error {

+ 17 - 6
libnetwork/osl/interface_linux.go

@@ -109,6 +109,7 @@ func (i *nwIface) Remove() error {
 
 	n.Lock()
 	path := n.path
+	isDefault := n.isDefault
 	n.Unlock()
 
 	return nsInvoke(path, func(nsFD int) error { return nil }, func(callerFD int) error {
@@ -134,7 +135,7 @@ func (i *nwIface) Remove() error {
 			if err := netlink.LinkDel(iface); err != nil {
 				return fmt.Errorf("failed deleting bridge %q: %v", i.SrcName(), err)
 			}
-		} else {
+		} else if !isDefault {
 			// Move the network interface to caller namespace.
 			if err := netlink.LinkSetNsFd(iface, callerFD); err != nil {
 				fmt.Println("LinkSetNsPid failed: ", err)
@@ -213,9 +214,15 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If
 	}
 
 	n.Lock()
-	i.dstName = fmt.Sprintf("%s%d", i.dstName, n.nextIfIndex)
-	n.nextIfIndex++
+	if n.isDefault {
+		i.dstName = i.srcName
+	} else {
+		i.dstName = fmt.Sprintf("%s%d", i.dstName, n.nextIfIndex)
+		n.nextIfIndex++
+	}
+
 	path := n.path
+	isDefault := n.isDefault
 	n.Unlock()
 
 	return nsInvoke(path, func(nsFD int) error {
@@ -231,9 +238,13 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If
 			return fmt.Errorf("failed to get link by name %q: %v", i.srcName, err)
 		}
 
-		// Move the network interface to the destination namespace.
-		if err := netlink.LinkSetNsFd(iface, nsFD); err != nil {
-			return fmt.Errorf("failed to set namespace on link %q: %v", i.srcName, err)
+		// Move the network interface to the destination
+		// namespace only if the namespace is not a default
+		// type
+		if !isDefault {
+			if err := netlink.LinkSetNsFd(iface, nsFD); err != nil {
+				return fmt.Errorf("failed to set namespace on link %q: %v", i.srcName, err)
+			}
 		}
 
 		return nil

+ 2 - 1
libnetwork/osl/namespace_linux.go

@@ -41,6 +41,7 @@ type networkNamespace struct {
 	staticRoutes []*types.StaticRoute
 	neighbors    []*neigh
 	nextIfIndex  int
+	isDefault    bool
 	sync.Mutex
 }
 
@@ -146,7 +147,7 @@ func NewSandbox(key string, osCreate bool) (Sandbox, error) {
 		return nil, err
 	}
 
-	return &networkNamespace{path: key}, nil
+	return &networkNamespace{path: key, isDefault: !osCreate}, nil
 }
 
 func (n *networkNamespace) InterfaceOptions() IfaceOptionSetter {

+ 96 - 0
libnetwork/test/integration/dnet/helpers.bash

@@ -163,6 +163,7 @@ EOF
 	   --name=${name}  \
 	   --privileged \
 	   -p ${hport}:${cport} \
+	   -e _OVERLAY_HOST_MODE \
 	   -v $(pwd)/:/go/src/github.com/docker/libnetwork \
 	   -v /tmp:/tmp \
 	   -v $(pwd)/${TMPC_ROOT}:/scratch \
@@ -215,6 +216,21 @@ function runc() {
     dnet_exec ${dnet} "umount /var/run/netns/c && rm /var/run/netns/c"
 }
 
+function runc_nofail() {
+    local dnet
+
+    dnet=${1}
+    shift
+    dnet_exec ${dnet} "cp /var/lib/docker/network/files/${1}*/* /scratch/rootfs/etc"
+    dnet_exec ${dnet} "mkdir -p /var/run/netns"
+    dnet_exec ${dnet} "touch /var/run/netns/c && mount -o bind /var/run/docker/netns/${1} /var/run/netns/c"
+    set +e
+    dnet_exec ${dnet} "ip netns exec c unshare -fmuip --mount-proc chroot \"/scratch/rootfs\" /bin/sh -c \"/bin/mount -t proc proc /proc && ${2}\""
+    status="$?"
+    set -e
+    dnet_exec ${dnet} "umount /var/run/netns/c && rm /var/run/netns/c"
+}
+
 function start_etcd() {
     local bridge_ip
     stop_etcd
@@ -442,3 +458,83 @@ function test_overlay_singlehost() {
 
     dnet_cmd $(inst_id2port 1) network rm multihost
 }
+
+function test_overlay_hostmode() {
+    dnet_suffix=$1
+    shift
+
+    echo $(docker ps)
+
+    start=1
+    end=2
+    # Setup overlay network and connect containers ot it
+    dnet_cmd $(inst_id2port 1) network create -d overlay multihost1
+    dnet_cmd $(inst_id2port 1) network create -d overlay multihost2
+    dnet_cmd $(inst_id2port 1) network ls
+
+    for i in `seq ${start} ${end}`;
+    do
+	dnet_cmd $(inst_id2port 1) container create mh1_${i}
+	net_connect 1 mh1_${i} multihost1
+    done
+
+    for i in `seq ${start} ${end}`;
+    do
+	dnet_cmd $(inst_id2port 1) container create mh2_${i}
+	net_connect 1 mh2_${i} multihost2
+    done
+
+    # Now test connectivity between all the containers using service names
+    for i in `seq ${start} ${end}`;
+    do
+	for j in `seq ${start} ${end}`;
+	do
+	    if [ "$i" -eq "$j" ]; then
+		continue
+	    fi
+
+	    # Find the IP addresses of the j containers on both networks
+	    hrun runc $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh1_${i}) "nslookup mh1_$j"
+	    mh1_j_ip=$(echo ${output} | awk '{print $11}')
+
+	    hrun runc $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh2_${i}) "nslookup mh2_$j"
+	    mh2_j_ip=$(echo ${output} | awk '{print $11}')
+
+	    # Ping the j containers in the same network and ensure they are successfull
+	    runc $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh1_${i}) \
+		 "ping -c 1 mh1_$j"
+	    runc $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh2_${i}) \
+		 "ping -c 1 mh2_$j"
+
+	    # Try pinging j container IPs from the container in the other network and make sure that they are not successfull
+	    runc_nofail $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh1_${i}) "ping -c 1 ${mh2_j_ip}"
+	    [ "${status}" -ne 0 ]
+
+	    runc_nofail $(dnet_container_name 1 $dnet_suffix) $(get_sbox_id 1 mh2_${i}) "ping -c 1 ${mh1_j_ip}"
+	    [ "${status}" -ne 0 ]
+
+	    # Try pinging the j container IPS from the host(dnet container in this case) and make syre that they are not successfull
+	    hrun docker exec -it $(dnet_container_name 1 $dnet_suffix) "ping -c 1 ${mh1_j_ip}"
+	    [ "${status}" -ne 0 ]
+
+	    hrun docker exec -it $(dnet_container_name 1 $dnet_suffix) "ping -c 1 ${mh2_j_ip}"
+	    [ "${status}" -ne 0 ]
+	done
+    done
+
+    # Teardown the container connections and the network
+    for i in `seq ${start} ${end}`;
+    do
+	net_disconnect 1 mh1_${i} multihost1
+	dnet_cmd $(inst_id2port 1) container rm mh1_${i}
+    done
+
+    for i in `seq ${start} ${end}`;
+    do
+	net_disconnect 1 mh2_${i} multihost2
+	dnet_cmd $(inst_id2port 1) container rm mh2_${i}
+    done
+
+    dnet_cmd $(inst_id2port 1) network rm multihost1
+    dnet_cmd $(inst_id2port 1) network rm multihost2
+}

+ 9 - 0
libnetwork/test/integration/dnet/overlay-consul-host.bats

@@ -0,0 +1,9 @@
+# -*- mode: sh -*-
+#!/usr/bin/env bats
+
+load helpers
+
+@test "Test overlay network hostmode with consul" {
+    skip_for_circleci
+    test_overlay_hostmode consul
+}

+ 16 - 1
libnetwork/test/integration/dnet/run-integration-tests.sh

@@ -56,6 +56,21 @@ function run_overlay_consul_tests() {
     unset cmap[dnet-3-consul]
 }
 
+function run_overlay_consul_host_tests() {
+    export _OVERLAY_HOST_MODE="true"
+    ## Setup
+    start_dnet 1 consul 1>>${INTEGRATION_ROOT}/test.log 2>&1
+    cmap[dnet-1-consul]=dnet-1-consul
+
+    ## Run the test cases
+    ./integration-tmp/bin/bats ./test/integration/dnet/overlay-consul-host.bats
+
+    ## Teardown
+    stop_dnet 1 consul 1>>${INTEGRATION_ROOT}/test.log 2>&1
+    unset cmap[dnet-1-consul]
+    unset _OVERLAY_HOST_MODE
+}
+
 function run_overlay_zk_tests() {
     ## Test overlay network with zookeeper
     start_dnet 1 zookeeper 1>>${INTEGRATION_ROOT}/test.log 2>&1
@@ -207,7 +222,7 @@ if [ -z "$SUITES" ]; then
 	# old kernel and limited docker environment.
 	suites="dnet simple_consul multi_consul multi_zk multi_etcd"
     else
-	suites="dnet simple_consul multi_consul multi_zk multi_etcd  bridge overlay_consul overlay_zk overlay_etcd"
+	suites="dnet simple_consul multi_consul multi_zk multi_etcd  bridge overlay_consul overlay_consul_host overlay_zk overlay_etcd"
     fi
 else
     suites="$SUITES"