|
@@ -0,0 +1,471 @@
|
|
|
+package lxc
|
|
|
+
|
|
|
+import (
|
|
|
+ "fmt"
|
|
|
+ "github.com/dotcloud/docker/engine"
|
|
|
+ "github.com/dotcloud/docker/networkdriver"
|
|
|
+ "github.com/dotcloud/docker/networkdriver/ipallocator"
|
|
|
+ "github.com/dotcloud/docker/networkdriver/portallocator"
|
|
|
+ "github.com/dotcloud/docker/networkdriver/portmapper"
|
|
|
+ "github.com/dotcloud/docker/pkg/iptables"
|
|
|
+ "github.com/dotcloud/docker/pkg/netlink"
|
|
|
+ "github.com/dotcloud/docker/utils"
|
|
|
+ "io/ioutil"
|
|
|
+ "log"
|
|
|
+ "net"
|
|
|
+ "strings"
|
|
|
+ "syscall"
|
|
|
+ "unsafe"
|
|
|
+)
|
|
|
+
|
|
|
+const (
|
|
|
+ DefaultNetworkBridge = "docker0"
|
|
|
+ siocBRADDBR = 0x89a0
|
|
|
+)
|
|
|
+
|
|
|
+// Network interface represents the networking stack of a container
|
|
|
+type networkInterface struct {
|
|
|
+ IP net.IP
|
|
|
+ PortMappings []net.Addr // there are mappings to the host interfaces
|
|
|
+}
|
|
|
+
|
|
|
+var (
|
|
|
+ addrs = []string{
|
|
|
+ // Here we don't follow the convention of using the 1st IP of the range for the gateway.
|
|
|
+ // This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
|
|
|
+ // In theory this shouldn't matter - in practice there's bound to be a few scripts relying
|
|
|
+ // on the internal addressing or other stupid things like that.
|
|
|
+ // The shouldn't, but hey, let's not break them unless we really have to.
|
|
|
+ "172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
|
|
|
+ "10.0.42.1/16", // Don't even try using the entire /8, that's too intrusive
|
|
|
+ "10.1.42.1/16",
|
|
|
+ "10.42.42.1/16",
|
|
|
+ "172.16.42.1/24",
|
|
|
+ "172.16.43.1/24",
|
|
|
+ "172.16.44.1/24",
|
|
|
+ "10.0.42.1/24",
|
|
|
+ "10.0.43.1/24",
|
|
|
+ "192.168.42.1/24",
|
|
|
+ "192.168.43.1/24",
|
|
|
+ "192.168.44.1/24",
|
|
|
+ }
|
|
|
+
|
|
|
+ bridgeIface string
|
|
|
+ bridgeNetwork *net.IPNet
|
|
|
+
|
|
|
+ defaultBindingIP = net.ParseIP("0.0.0.0")
|
|
|
+ currentInterfaces = make(map[string]*networkInterface)
|
|
|
+)
|
|
|
+
|
|
|
+func init() {
|
|
|
+ if err := engine.Register("init_networkdriver", InitDriver); err != nil {
|
|
|
+ panic(err)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func InitDriver(job *engine.Job) engine.Status {
|
|
|
+ var (
|
|
|
+ network *net.IPNet
|
|
|
+ enableIPTables = job.GetenvBool("EnableIptables")
|
|
|
+ icc = job.GetenvBool("InterContainerCommunication")
|
|
|
+ ipForward = job.GetenvBool("EnableIpForward")
|
|
|
+ bridgeIP = job.Getenv("BridgeIP")
|
|
|
+ )
|
|
|
+
|
|
|
+ if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" {
|
|
|
+ defaultBindingIP = net.ParseIP(defaultIP)
|
|
|
+ }
|
|
|
+
|
|
|
+ bridgeIface = job.Getenv("BridgeIface")
|
|
|
+ if bridgeIface == "" {
|
|
|
+ bridgeIface = DefaultNetworkBridge
|
|
|
+ }
|
|
|
+
|
|
|
+ addr, err := networkdriver.GetIfaceAddr(bridgeIface)
|
|
|
+ if err != nil {
|
|
|
+ // If the iface is not found, try to create it
|
|
|
+ job.Logf("creating new bridge for %s", bridgeIface)
|
|
|
+ if err := createBridge(bridgeIP); err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+
|
|
|
+ job.Logf("getting iface addr")
|
|
|
+ addr, err = networkdriver.GetIfaceAddr(bridgeIface)
|
|
|
+ if err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+ network = addr.(*net.IPNet)
|
|
|
+ } else {
|
|
|
+ network = addr.(*net.IPNet)
|
|
|
+ }
|
|
|
+
|
|
|
+ // Configure iptables for link support
|
|
|
+ if enableIPTables {
|
|
|
+ if err := setupIPTables(addr, icc); err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ipForward {
|
|
|
+ // Enable IPv4 forwarding
|
|
|
+ if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil {
|
|
|
+ job.Logf("WARNING: unable to enable IPv4 forwarding: %s\n", err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // We can always try removing the iptables
|
|
|
+ if err := iptables.RemoveExistingChain("DOCKER"); err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+
|
|
|
+ if enableIPTables {
|
|
|
+ chain, err := iptables.NewChain("DOCKER", bridgeIface)
|
|
|
+ if err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+ portmapper.SetIptablesChain(chain)
|
|
|
+ }
|
|
|
+
|
|
|
+ bridgeNetwork = network
|
|
|
+
|
|
|
+ // https://github.com/dotcloud/docker/issues/2768
|
|
|
+ job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", bridgeNetwork.IP)
|
|
|
+
|
|
|
+ for name, f := range map[string]engine.Handler{
|
|
|
+ "allocate_interface": Allocate,
|
|
|
+ "release_interface": Release,
|
|
|
+ "allocate_port": AllocatePort,
|
|
|
+ "link": LinkContainers,
|
|
|
+ } {
|
|
|
+ if err := job.Eng.Register(name, f); err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return engine.StatusOK
|
|
|
+}
|
|
|
+
|
|
|
+func setupIPTables(addr net.Addr, icc bool) error {
|
|
|
+ // Enable NAT
|
|
|
+ natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-d", addr.String(), "-j", "MASQUERADE"}
|
|
|
+
|
|
|
+ if !iptables.Exists(natArgs...) {
|
|
|
+ if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil {
|
|
|
+ return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
|
|
|
+ } else if len(output) != 0 {
|
|
|
+ return fmt.Errorf("Error iptables postrouting: %s", output)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ var (
|
|
|
+ args = []string{"FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-j"}
|
|
|
+ acceptArgs = append(args, "ACCEPT")
|
|
|
+ dropArgs = append(args, "DROP")
|
|
|
+ )
|
|
|
+
|
|
|
+ if !icc {
|
|
|
+ iptables.Raw(append([]string{"-D"}, acceptArgs...)...)
|
|
|
+
|
|
|
+ if !iptables.Exists(dropArgs...) {
|
|
|
+
|
|
|
+ utils.Debugf("Disable inter-container communication")
|
|
|
+ if output, err := iptables.Raw(append([]string{"-I"}, dropArgs...)...); err != nil {
|
|
|
+ return fmt.Errorf("Unable to prevent intercontainer communication: %s", err)
|
|
|
+ } else if len(output) != 0 {
|
|
|
+ return fmt.Errorf("Error disabling intercontainer communication: %s", output)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ iptables.Raw(append([]string{"-D"}, dropArgs...)...)
|
|
|
+
|
|
|
+ if !iptables.Exists(acceptArgs...) {
|
|
|
+ utils.Debugf("Enable inter-container communication")
|
|
|
+ if output, err := iptables.Raw(append([]string{"-I"}, acceptArgs...)...); err != nil {
|
|
|
+ return fmt.Errorf("Unable to allow intercontainer communication: %s", err)
|
|
|
+ } else if len(output) != 0 {
|
|
|
+ return fmt.Errorf("Error enabling intercontainer communication: %s", output)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Accept all non-intercontainer outgoing packets
|
|
|
+ outgoingArgs := []string{"FORWARD", "-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}
|
|
|
+ if !iptables.Exists(outgoingArgs...) {
|
|
|
+ if output, err := iptables.Raw(append([]string{"-I"}, outgoingArgs...)...); err != nil {
|
|
|
+ return fmt.Errorf("Unable to allow outgoing packets: %s", err)
|
|
|
+ } else if len(output) != 0 {
|
|
|
+ return fmt.Errorf("Error iptables allow outgoing: %s", output)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Accept incoming packets for existing connections
|
|
|
+ existingArgs := []string{"FORWARD", "-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}
|
|
|
+
|
|
|
+ if !iptables.Exists(existingArgs...) {
|
|
|
+ if output, err := iptables.Raw(append([]string{"-I"}, existingArgs...)...); err != nil {
|
|
|
+ return fmt.Errorf("Unable to allow incoming packets: %s", err)
|
|
|
+ } else if len(output) != 0 {
|
|
|
+ return fmt.Errorf("Error iptables allow incoming: %s", output)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// CreateBridgeIface creates a network bridge interface on the host system with the name `ifaceName`,
|
|
|
+// and attempts to configure it with an address which doesn't conflict with any other interface on the host.
|
|
|
+// If it can't find an address which doesn't conflict, it will return an error.
|
|
|
+func createBridge(bridgeIP string) error {
|
|
|
+ nameservers := []string{}
|
|
|
+ resolvConf, _ := utils.GetResolvConf()
|
|
|
+ // we don't check for an error here, because we don't really care
|
|
|
+ // if we can't read /etc/resolv.conf. So instead we skip the append
|
|
|
+ // if resolvConf is nil. It either doesn't exist, or we can't read it
|
|
|
+ // for some reason.
|
|
|
+ if resolvConf != nil {
|
|
|
+ nameservers = append(nameservers, utils.GetNameserversAsCIDR(resolvConf)...)
|
|
|
+ }
|
|
|
+
|
|
|
+ var ifaceAddr string
|
|
|
+ if len(bridgeIP) != 0 {
|
|
|
+ _, _, err := net.ParseCIDR(bridgeIP)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ ifaceAddr = bridgeIP
|
|
|
+ } else {
|
|
|
+ for _, addr := range addrs {
|
|
|
+ _, dockerNetwork, err := net.ParseCIDR(addr)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil {
|
|
|
+ if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil {
|
|
|
+ ifaceAddr = addr
|
|
|
+ break
|
|
|
+ } else {
|
|
|
+ utils.Debugf("%s %s", addr, err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ifaceAddr == "" {
|
|
|
+ return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface)
|
|
|
+ }
|
|
|
+ utils.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr)
|
|
|
+
|
|
|
+ if err := createBridgeIface(bridgeIface); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ iface, err := net.InterfaceByName(bridgeIface)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ if netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil {
|
|
|
+ return fmt.Errorf("Unable to add private network: %s", err)
|
|
|
+ }
|
|
|
+ if err := netlink.NetworkLinkUp(iface); err != nil {
|
|
|
+ return fmt.Errorf("Unable to start network bridge: %s", err)
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// Create the actual bridge device. This is more backward-compatible than
|
|
|
+// netlink.NetworkLinkAdd and works on RHEL 6.
|
|
|
+func createBridgeIface(name string) error {
|
|
|
+ s, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_STREAM, syscall.IPPROTO_IP)
|
|
|
+ if err != nil {
|
|
|
+ utils.Debugf("Bridge socket creation failed IPv6 probably not enabled: %v", err)
|
|
|
+ s, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP)
|
|
|
+ if err != nil {
|
|
|
+ return fmt.Errorf("Error creating bridge creation socket: %s", err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ defer syscall.Close(s)
|
|
|
+
|
|
|
+ nameBytePtr, err := syscall.BytePtrFromString(name)
|
|
|
+ if err != nil {
|
|
|
+ return fmt.Errorf("Error converting bridge name %s to byte array: %s", name, err)
|
|
|
+ }
|
|
|
+
|
|
|
+ if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), siocBRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 {
|
|
|
+ return fmt.Errorf("Error creating bridge: %s", err)
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// Allocate a network interface
|
|
|
+func Allocate(job *engine.Job) engine.Status {
|
|
|
+ var (
|
|
|
+ ip *net.IP
|
|
|
+ err error
|
|
|
+ id = job.Args[0]
|
|
|
+ requestedIP = net.ParseIP(job.Getenv("RequestedIP"))
|
|
|
+ )
|
|
|
+
|
|
|
+ if requestedIP != nil {
|
|
|
+ ip, err = ipallocator.RequestIP(bridgeNetwork, &requestedIP)
|
|
|
+ } else {
|
|
|
+ ip, err = ipallocator.RequestIP(bridgeNetwork, nil)
|
|
|
+ }
|
|
|
+ if err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+
|
|
|
+ out := engine.Env{}
|
|
|
+ out.Set("IP", ip.String())
|
|
|
+ out.Set("Mask", bridgeNetwork.Mask.String())
|
|
|
+ out.Set("Gateway", bridgeNetwork.IP.String())
|
|
|
+ out.Set("Bridge", bridgeIface)
|
|
|
+
|
|
|
+ size, _ := bridgeNetwork.Mask.Size()
|
|
|
+ out.SetInt("IPPrefixLen", size)
|
|
|
+
|
|
|
+ currentInterfaces[id] = &networkInterface{
|
|
|
+ IP: *ip,
|
|
|
+ }
|
|
|
+
|
|
|
+ out.WriteTo(job.Stdout)
|
|
|
+
|
|
|
+ return engine.StatusOK
|
|
|
+}
|
|
|
+
|
|
|
+// release an interface for a select ip
|
|
|
+func Release(job *engine.Job) engine.Status {
|
|
|
+ var (
|
|
|
+ id = job.Args[0]
|
|
|
+ containerInterface = currentInterfaces[id]
|
|
|
+ ip net.IP
|
|
|
+ port int
|
|
|
+ proto string
|
|
|
+ )
|
|
|
+
|
|
|
+ for _, nat := range containerInterface.PortMappings {
|
|
|
+ if err := portmapper.Unmap(nat); err != nil {
|
|
|
+ log.Printf("Unable to unmap port %s: %s", nat, err)
|
|
|
+ }
|
|
|
+
|
|
|
+ // this is host mappings
|
|
|
+ switch a := nat.(type) {
|
|
|
+ case *net.TCPAddr:
|
|
|
+ proto = "tcp"
|
|
|
+ ip = a.IP
|
|
|
+ port = a.Port
|
|
|
+ case *net.UDPAddr:
|
|
|
+ proto = "udp"
|
|
|
+ ip = a.IP
|
|
|
+ port = a.Port
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := portallocator.ReleasePort(ip, proto, port); err != nil {
|
|
|
+ log.Printf("Unable to release port %s", nat)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := ipallocator.ReleaseIP(bridgeNetwork, &containerInterface.IP); err != nil {
|
|
|
+ log.Printf("Unable to release ip %s\n", err)
|
|
|
+ }
|
|
|
+ return engine.StatusOK
|
|
|
+}
|
|
|
+
|
|
|
+// Allocate an external port and map it to the interface
|
|
|
+func AllocatePort(job *engine.Job) engine.Status {
|
|
|
+ var (
|
|
|
+ err error
|
|
|
+
|
|
|
+ ip = defaultBindingIP
|
|
|
+ id = job.Args[0]
|
|
|
+ hostIP = job.Getenv("HostIP")
|
|
|
+ hostPort = job.GetenvInt("HostPort")
|
|
|
+ containerPort = job.GetenvInt("ContainerPort")
|
|
|
+ proto = job.Getenv("Proto")
|
|
|
+ network = currentInterfaces[id]
|
|
|
+ )
|
|
|
+
|
|
|
+ if hostIP != "" {
|
|
|
+ ip = net.ParseIP(hostIP)
|
|
|
+ }
|
|
|
+
|
|
|
+ // host ip, proto, and host port
|
|
|
+ hostPort, err = portallocator.RequestPort(ip, proto, hostPort)
|
|
|
+ if err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+
|
|
|
+ var (
|
|
|
+ container net.Addr
|
|
|
+ host net.Addr
|
|
|
+ )
|
|
|
+
|
|
|
+ if proto == "tcp" {
|
|
|
+ host = &net.TCPAddr{IP: ip, Port: hostPort}
|
|
|
+ container = &net.TCPAddr{IP: network.IP, Port: containerPort}
|
|
|
+ } else {
|
|
|
+ host = &net.UDPAddr{IP: ip, Port: hostPort}
|
|
|
+ container = &net.UDPAddr{IP: network.IP, Port: containerPort}
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := portmapper.Map(container, ip, hostPort); err != nil {
|
|
|
+ portallocator.ReleasePort(ip, proto, hostPort)
|
|
|
+
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+ network.PortMappings = append(network.PortMappings, host)
|
|
|
+
|
|
|
+ out := engine.Env{}
|
|
|
+ out.Set("HostIP", ip.String())
|
|
|
+ out.SetInt("HostPort", hostPort)
|
|
|
+
|
|
|
+ if _, err := out.WriteTo(job.Stdout); err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+ return engine.StatusOK
|
|
|
+}
|
|
|
+
|
|
|
+func LinkContainers(job *engine.Job) engine.Status {
|
|
|
+ var (
|
|
|
+ action = job.Args[0]
|
|
|
+ childIP = job.Getenv("ChildIP")
|
|
|
+ parentIP = job.Getenv("ParentIP")
|
|
|
+ ignoreErrors = job.GetenvBool("IgnoreErrors")
|
|
|
+ ports = job.GetenvList("Ports")
|
|
|
+ )
|
|
|
+ split := func(p string) (string, string) {
|
|
|
+ parts := strings.Split(p, "/")
|
|
|
+ return parts[0], parts[1]
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, p := range ports {
|
|
|
+ port, proto := split(p)
|
|
|
+ if output, err := iptables.Raw(action, "FORWARD",
|
|
|
+ "-i", bridgeIface, "-o", bridgeIface,
|
|
|
+ "-p", proto,
|
|
|
+ "-s", parentIP,
|
|
|
+ "--dport", port,
|
|
|
+ "-d", childIP,
|
|
|
+ "-j", "ACCEPT"); !ignoreErrors && err != nil {
|
|
|
+ job.Error(err)
|
|
|
+ return engine.StatusErr
|
|
|
+ } else if len(output) != 0 {
|
|
|
+ job.Errorf("Error toggle iptables forward: %s", output)
|
|
|
+ return engine.StatusErr
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return engine.StatusOK
|
|
|
+}
|