Browse Source

Merge pull request #25 from mrjana/master

Reimplement create namespace and move interface to ns without reexec
Arnaud Porterie 10 years ago
parent
commit
6e14090866
23 changed files with 1748 additions and 179 deletions
  1. 14 0
      libnetwork/Godeps/Godeps.json
  2. 2 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/iptables/MAINTAINERS
  3. 292 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/iptables/iptables.go
  4. 204 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/iptables/iptables_test.go
  5. 1 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/MAINTAINERS
  6. 216 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/network_proxy_test.go
  7. 29 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/proxy.go
  8. 22 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/stub_proxy.go
  9. 90 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/tcp_proxy.go
  10. 158 0
      libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/udp_proxy.go
  11. 192 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/LICENSE
  12. 49 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/README.md
  13. 66 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns.go
  14. 206 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_linux.go
  15. 5 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_linux_386.go
  16. 5 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_linux_amd.go
  17. 5 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_linux_arm.go
  18. 66 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_test.go
  19. 35 0
      libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_unspecified.go
  20. 91 4
      libnetwork/namespace.go
  21. 0 45
      libnetwork/reexec.go
  22. 0 78
      libnetwork/reexec_move_interface.go
  23. 0 52
      libnetwork/reexec_netns_create.go

+ 14 - 0
libnetwork/Godeps/Godeps.json

@@ -15,11 +15,21 @@
 			"Comment": "v1.4.1-1379-g8e107a9",
 			"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
 		},
+		{
+			"ImportPath": "github.com/docker/docker/pkg/iptables",
+			"Comment": "v1.4.1-1379-g8e107a9",
+			"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
+		},
 		{
 			"ImportPath": "github.com/docker/docker/pkg/parsers/kernel",
 			"Comment": "v1.4.1-1379-g8e107a9",
 			"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
 		},
+		{
+			"ImportPath": "github.com/docker/docker/pkg/proxy",
+			"Comment": "v1.4.1-1379-g8e107a9",
+			"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
+		},
 		{
 			"ImportPath": "github.com/docker/docker/pkg/reexec",
 			"Comment": "v1.4.1-1379-g8e107a9",
@@ -33,6 +43,10 @@
 		{
 			"ImportPath": "github.com/vishvananda/netlink",
 			"Rev": "8eb64238879fed52fd51c5b30ad20b928fb4c36c"
+		},
+		{
+			"ImportPath": "github.com/vishvananda/netns",
+			"Rev": "008d17ae001344769b031375bdb38a86219154c6"
 		}
 	]
 }

+ 2 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/iptables/MAINTAINERS

@@ -0,0 +1,2 @@
+Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
+Jessie Frazelle <jess@docker.com> (@jfrazelle)

+ 292 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/iptables/iptables.go

@@ -0,0 +1,292 @@
+package iptables
+
+import (
+	"errors"
+	"fmt"
+	"net"
+	"os/exec"
+	"regexp"
+	"strconv"
+	"strings"
+
+	log "github.com/Sirupsen/logrus"
+)
+
+type Action string
+type Table string
+
+const (
+	Append Action = "-A"
+	Delete Action = "-D"
+	Insert Action = "-I"
+	Nat    Table  = "nat"
+	Filter Table  = "filter"
+)
+
+var (
+	iptablesPath        string
+	supportsXlock       = false
+	ErrIptablesNotFound = errors.New("Iptables not found")
+)
+
+type Chain struct {
+	Name   string
+	Bridge string
+	Table  Table
+}
+
+type ChainError struct {
+	Chain  string
+	Output []byte
+}
+
+func (e *ChainError) Error() string {
+	return fmt.Sprintf("Error iptables %s: %s", e.Chain, string(e.Output))
+}
+
+func initCheck() error {
+
+	if iptablesPath == "" {
+		path, err := exec.LookPath("iptables")
+		if err != nil {
+			return ErrIptablesNotFound
+		}
+		iptablesPath = path
+		supportsXlock = exec.Command(iptablesPath, "--wait", "-L", "-n").Run() == nil
+	}
+	return nil
+}
+
+func NewChain(name, bridge string, table Table) (*Chain, error) {
+	c := &Chain{
+		Name:   name,
+		Bridge: bridge,
+		Table:  table,
+	}
+
+	if string(c.Table) == "" {
+		c.Table = Filter
+	}
+
+	// Add chain if it doesn't exist
+	if _, err := Raw("-t", string(c.Table), "-n", "-L", c.Name); err != nil {
+		if output, err := Raw("-t", string(c.Table), "-N", c.Name); err != nil {
+			return nil, err
+		} else if len(output) != 0 {
+			return nil, fmt.Errorf("Could not create %s/%s chain: %s", c.Table, c.Name, output)
+		}
+	}
+
+	switch table {
+	case Nat:
+		preroute := []string{
+			"-m", "addrtype",
+			"--dst-type", "LOCAL"}
+		if !Exists(preroute...) {
+			if err := c.Prerouting(Append, preroute...); err != nil {
+				return nil, fmt.Errorf("Failed to inject docker in PREROUTING chain: %s", err)
+			}
+		}
+		output := []string{
+			"-m", "addrtype",
+			"--dst-type", "LOCAL",
+			"!", "--dst", "127.0.0.0/8"}
+		if !Exists(output...) {
+			if err := c.Output(Append, output...); err != nil {
+				return nil, fmt.Errorf("Failed to inject docker in OUTPUT chain: %s", err)
+			}
+		}
+	case Filter:
+		link := []string{"FORWARD",
+			"-o", c.Bridge,
+			"-j", c.Name}
+		if !Exists(link...) {
+			insert := append([]string{string(Insert)}, link...)
+			if output, err := Raw(insert...); err != nil {
+				return nil, err
+			} else if len(output) != 0 {
+				return nil, fmt.Errorf("Could not create linking rule to %s/%s: %s", c.Table, c.Name, output)
+			}
+		}
+	}
+	return c, nil
+}
+
+func RemoveExistingChain(name string, table Table) error {
+	c := &Chain{
+		Name:  name,
+		Table: table,
+	}
+	if string(c.Table) == "" {
+		c.Table = Filter
+	}
+	return c.Remove()
+}
+
+// Add forwarding rule to 'filter' table and corresponding nat rule to 'nat' table
+func (c *Chain) Forward(action Action, ip net.IP, port int, proto, destAddr string, destPort int) error {
+	daddr := ip.String()
+	if ip.IsUnspecified() {
+		// iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we
+		// want "0.0.0.0/0". "0/0" is correctly interpreted as "any
+		// value" by both iptables and ip6tables.
+		daddr = "0/0"
+	}
+	if output, err := Raw("-t", string(Nat), string(action), c.Name,
+		"-p", proto,
+		"-d", daddr,
+		"--dport", strconv.Itoa(port),
+		"!", "-i", c.Bridge,
+		"-j", "DNAT",
+		"--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort))); err != nil {
+		return err
+	} else if len(output) != 0 {
+		return &ChainError{Chain: "FORWARD", Output: output}
+	}
+
+	if output, err := Raw("-t", string(Filter), string(action), c.Name,
+		"!", "-i", c.Bridge,
+		"-o", c.Bridge,
+		"-p", proto,
+		"-d", destAddr,
+		"--dport", strconv.Itoa(destPort),
+		"-j", "ACCEPT"); err != nil {
+		return err
+	} else if len(output) != 0 {
+		return &ChainError{Chain: "FORWARD", Output: output}
+	}
+
+	if output, err := Raw("-t", string(Nat), string(action), "POSTROUTING",
+		"-p", proto,
+		"-s", destAddr,
+		"-d", destAddr,
+		"--dport", strconv.Itoa(destPort),
+		"-j", "MASQUERADE"); err != nil {
+		return err
+	} else if len(output) != 0 {
+		return &ChainError{Chain: "FORWARD", Output: output}
+	}
+
+	return nil
+}
+
+// Add reciprocal ACCEPT rule for two supplied IP addresses.
+// Traffic is allowed from ip1 to ip2 and vice-versa
+func (c *Chain) Link(action Action, ip1, ip2 net.IP, port int, proto string) error {
+	if output, err := Raw("-t", string(Filter), string(action), c.Name,
+		"-i", c.Bridge, "-o", c.Bridge,
+		"-p", proto,
+		"-s", ip1.String(),
+		"-d", ip2.String(),
+		"--dport", strconv.Itoa(port),
+		"-j", "ACCEPT"); err != nil {
+		return err
+	} else if len(output) != 0 {
+		return fmt.Errorf("Error iptables forward: %s", output)
+	}
+	if output, err := Raw("-t", string(Filter), string(action), c.Name,
+		"-i", c.Bridge, "-o", c.Bridge,
+		"-p", proto,
+		"-s", ip2.String(),
+		"-d", ip1.String(),
+		"--sport", strconv.Itoa(port),
+		"-j", "ACCEPT"); err != nil {
+		return err
+	} else if len(output) != 0 {
+		return fmt.Errorf("Error iptables forward: %s", output)
+	}
+	return nil
+}
+
+// Add linking rule to nat/PREROUTING chain.
+func (c *Chain) Prerouting(action Action, args ...string) error {
+	a := []string{"-t", string(Nat), string(action), "PREROUTING"}
+	if len(args) > 0 {
+		a = append(a, args...)
+	}
+	if output, err := Raw(append(a, "-j", c.Name)...); err != nil {
+		return err
+	} else if len(output) != 0 {
+		return &ChainError{Chain: "PREROUTING", Output: output}
+	}
+	return nil
+}
+
+// Add linking rule to an OUTPUT chain
+func (c *Chain) Output(action Action, args ...string) error {
+	a := []string{"-t", string(c.Table), string(action), "OUTPUT"}
+	if len(args) > 0 {
+		a = append(a, args...)
+	}
+	if output, err := Raw(append(a, "-j", c.Name)...); err != nil {
+		return err
+	} else if len(output) != 0 {
+		return &ChainError{Chain: "OUTPUT", Output: output}
+	}
+	return nil
+}
+
+func (c *Chain) Remove() error {
+	// Ignore errors - This could mean the chains were never set up
+	if c.Table == Nat {
+		c.Prerouting(Delete, "-m", "addrtype", "--dst-type", "LOCAL")
+		c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", "127.0.0.0/8")
+		c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL") // Created in versions <= 0.1.6
+
+		c.Prerouting(Delete)
+		c.Output(Delete)
+	}
+	Raw("-t", string(c.Table), "-F", c.Name)
+	Raw("-t", string(c.Table), "-X", c.Name)
+	return nil
+}
+
+// Check if a rule exists
+func Exists(args ...string) bool {
+	// iptables -C, --check option was added in v.1.4.11
+	// http://ftp.netfilter.org/pub/iptables/changes-iptables-1.4.11.txt
+
+	// try -C
+	// if exit status is 0 then return true, the rule exists
+	if _, err := Raw(append([]string{"-C"}, args...)...); err == nil {
+		return true
+	}
+
+	// parse iptables-save for the rule
+	rule := strings.Replace(strings.Join(args, " "), "-t nat ", "", -1)
+	existingRules, _ := exec.Command("iptables-save").Output()
+
+	// regex to replace ips in rule
+	// because MASQUERADE rule will not be exactly what was passed
+	re := regexp.MustCompile(`[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2}`)
+
+	return strings.Contains(
+		re.ReplaceAllString(string(existingRules), "?"),
+		re.ReplaceAllString(rule, "?"),
+	)
+}
+
+// Call 'iptables' system command, passing supplied arguments
+func Raw(args ...string) ([]byte, error) {
+
+	if err := initCheck(); err != nil {
+		return nil, err
+	}
+	if supportsXlock {
+		args = append([]string{"--wait"}, args...)
+	}
+
+	log.Debugf("%s, %v", iptablesPath, args)
+
+	output, err := exec.Command(iptablesPath, args...).CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("iptables failed: iptables %v: %s (%s)", strings.Join(args, " "), output, err)
+	}
+
+	// ignore iptables' message about xtables lock
+	if strings.Contains(string(output), "waiting for it to exit") {
+		output = []byte("")
+	}
+
+	return output, err
+}

+ 204 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/iptables/iptables_test.go

@@ -0,0 +1,204 @@
+package iptables
+
+import (
+	"net"
+	"os/exec"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+const chainName = "DOCKERTEST"
+
+var natChain *Chain
+var filterChain *Chain
+
+func TestNewChain(t *testing.T) {
+	var err error
+
+	natChain, err = NewChain(chainName, "lo", Nat)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	filterChain, err = NewChain(chainName, "lo", Filter)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestForward(t *testing.T) {
+	ip := net.ParseIP("192.168.1.1")
+	port := 1234
+	dstAddr := "172.17.0.1"
+	dstPort := 4321
+	proto := "tcp"
+
+	err := natChain.Forward(Insert, ip, port, proto, dstAddr, dstPort)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	dnatRule := []string{natChain.Name,
+		"-t", string(natChain.Table),
+		"!", "-i", filterChain.Bridge,
+		"-d", ip.String(),
+		"-p", proto,
+		"--dport", strconv.Itoa(port),
+		"-j", "DNAT",
+		"--to-destination", dstAddr + ":" + strconv.Itoa(dstPort),
+	}
+
+	if !Exists(dnatRule...) {
+		t.Fatalf("DNAT rule does not exist")
+	}
+
+	filterRule := []string{filterChain.Name,
+		"-t", string(filterChain.Table),
+		"!", "-i", filterChain.Bridge,
+		"-o", filterChain.Bridge,
+		"-d", dstAddr,
+		"-p", proto,
+		"--dport", strconv.Itoa(dstPort),
+		"-j", "ACCEPT",
+	}
+
+	if !Exists(filterRule...) {
+		t.Fatalf("filter rule does not exist")
+	}
+
+	masqRule := []string{"POSTROUTING",
+		"-t", string(natChain.Table),
+		"-d", dstAddr,
+		"-s", dstAddr,
+		"-p", proto,
+		"--dport", strconv.Itoa(dstPort),
+		"-j", "MASQUERADE",
+	}
+
+	if !Exists(masqRule...) {
+		t.Fatalf("MASQUERADE rule does not exist")
+	}
+}
+
+func TestLink(t *testing.T) {
+	var err error
+
+	ip1 := net.ParseIP("192.168.1.1")
+	ip2 := net.ParseIP("192.168.1.2")
+	port := 1234
+	proto := "tcp"
+
+	err = filterChain.Link(Append, ip1, ip2, port, proto)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	rule1 := []string{filterChain.Name,
+		"-t", string(filterChain.Table),
+		"-i", filterChain.Bridge,
+		"-o", filterChain.Bridge,
+		"-p", proto,
+		"-s", ip1.String(),
+		"-d", ip2.String(),
+		"--dport", strconv.Itoa(port),
+		"-j", "ACCEPT"}
+
+	if !Exists(rule1...) {
+		t.Fatalf("rule1 does not exist")
+	}
+
+	rule2 := []string{filterChain.Name,
+		"-t", string(filterChain.Table),
+		"-i", filterChain.Bridge,
+		"-o", filterChain.Bridge,
+		"-p", proto,
+		"-s", ip2.String(),
+		"-d", ip1.String(),
+		"--sport", strconv.Itoa(port),
+		"-j", "ACCEPT"}
+
+	if !Exists(rule2...) {
+		t.Fatalf("rule2 does not exist")
+	}
+}
+
+func TestPrerouting(t *testing.T) {
+	args := []string{
+		"-i", "lo",
+		"-d", "192.168.1.1"}
+
+	err := natChain.Prerouting(Insert, args...)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	rule := []string{"PREROUTING",
+		"-t", string(Nat),
+		"-j", natChain.Name}
+
+	rule = append(rule, args...)
+
+	if !Exists(rule...) {
+		t.Fatalf("rule does not exist")
+	}
+
+	delRule := append([]string{"-D"}, rule...)
+	if _, err = Raw(delRule...); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestOutput(t *testing.T) {
+	args := []string{
+		"-o", "lo",
+		"-d", "192.168.1.1"}
+
+	err := natChain.Output(Insert, args...)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	rule := []string{"OUTPUT",
+		"-t", string(natChain.Table),
+		"-j", natChain.Name}
+
+	rule = append(rule, args...)
+
+	if !Exists(rule...) {
+		t.Fatalf("rule does not exist")
+	}
+
+	delRule := append([]string{"-D"}, rule...)
+	if _, err = Raw(delRule...); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCleanup(t *testing.T) {
+	var err error
+	var rules []byte
+
+	// Cleanup filter/FORWARD first otherwise output of iptables-save is dirty
+	link := []string{"-t", string(filterChain.Table),
+		string(Delete), "FORWARD",
+		"-o", filterChain.Bridge,
+		"-j", filterChain.Name}
+	if _, err = Raw(link...); err != nil {
+		t.Fatal(err)
+	}
+	filterChain.Remove()
+
+	err = RemoveExistingChain(chainName, Nat)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	rules, err = exec.Command("iptables-save").Output()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if strings.Contains(string(rules), chainName) {
+		t.Fatalf("Removing chain failed. %s found in iptables-save", chainName)
+	}
+}

+ 1 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/MAINTAINERS

@@ -0,0 +1 @@
+Erik Hollensbe <github@hollensbe.org> (@erikh)

+ 216 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/network_proxy_test.go

@@ -0,0 +1,216 @@
+package proxy
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"net"
+	"strings"
+	"testing"
+	"time"
+)
+
+var testBuf = []byte("Buffalo buffalo Buffalo buffalo buffalo buffalo Buffalo buffalo")
+var testBufSize = len(testBuf)
+
+type EchoServer interface {
+	Run()
+	Close()
+	LocalAddr() net.Addr
+}
+
+type TCPEchoServer struct {
+	listener net.Listener
+	testCtx  *testing.T
+}
+
+type UDPEchoServer struct {
+	conn    net.PacketConn
+	testCtx *testing.T
+}
+
+func NewEchoServer(t *testing.T, proto, address string) EchoServer {
+	var server EchoServer
+	if strings.HasPrefix(proto, "tcp") {
+		listener, err := net.Listen(proto, address)
+		if err != nil {
+			t.Fatal(err)
+		}
+		server = &TCPEchoServer{listener: listener, testCtx: t}
+	} else {
+		socket, err := net.ListenPacket(proto, address)
+		if err != nil {
+			t.Fatal(err)
+		}
+		server = &UDPEchoServer{conn: socket, testCtx: t}
+	}
+	return server
+}
+
+func (server *TCPEchoServer) Run() {
+	go func() {
+		for {
+			client, err := server.listener.Accept()
+			if err != nil {
+				return
+			}
+			go func(client net.Conn) {
+				if _, err := io.Copy(client, client); err != nil {
+					server.testCtx.Logf("can't echo to the client: %v\n", err.Error())
+				}
+				client.Close()
+			}(client)
+		}
+	}()
+}
+
+func (server *TCPEchoServer) LocalAddr() net.Addr { return server.listener.Addr() }
+func (server *TCPEchoServer) Close()              { server.listener.Addr() }
+
+func (server *UDPEchoServer) Run() {
+	go func() {
+		readBuf := make([]byte, 1024)
+		for {
+			read, from, err := server.conn.ReadFrom(readBuf)
+			if err != nil {
+				return
+			}
+			for i := 0; i != read; {
+				written, err := server.conn.WriteTo(readBuf[i:read], from)
+				if err != nil {
+					break
+				}
+				i += written
+			}
+		}
+	}()
+}
+
+func (server *UDPEchoServer) LocalAddr() net.Addr { return server.conn.LocalAddr() }
+func (server *UDPEchoServer) Close()              { server.conn.Close() }
+
+func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string) {
+	defer proxy.Close()
+	go proxy.Run()
+	client, err := net.Dial(proto, addr)
+	if err != nil {
+		t.Fatalf("Can't connect to the proxy: %v", err)
+	}
+	defer client.Close()
+	client.SetDeadline(time.Now().Add(10 * time.Second))
+	if _, err = client.Write(testBuf); err != nil {
+		t.Fatal(err)
+	}
+	recvBuf := make([]byte, testBufSize)
+	if _, err = client.Read(recvBuf); err != nil {
+		t.Fatal(err)
+	}
+	if !bytes.Equal(testBuf, recvBuf) {
+		t.Fatal(fmt.Errorf("Expected [%v] but got [%v]", testBuf, recvBuf))
+	}
+}
+
+func testProxy(t *testing.T, proto string, proxy Proxy) {
+	testProxyAt(t, proto, proxy, proxy.FrontendAddr().String())
+}
+
+func TestTCP4Proxy(t *testing.T) {
+	backend := NewEchoServer(t, "tcp", "127.0.0.1:0")
+	defer backend.Close()
+	backend.Run()
+	frontendAddr := &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
+	proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
+	if err != nil {
+		t.Fatal(err)
+	}
+	testProxy(t, "tcp", proxy)
+}
+
+func TestTCP6Proxy(t *testing.T) {
+	backend := NewEchoServer(t, "tcp", "[::1]:0")
+	defer backend.Close()
+	backend.Run()
+	frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0}
+	proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
+	if err != nil {
+		t.Fatal(err)
+	}
+	testProxy(t, "tcp", proxy)
+}
+
+func TestTCPDualStackProxy(t *testing.T) {
+	// If I understand `godoc -src net favoriteAddrFamily` (used by the
+	// net.Listen* functions) correctly this should work, but it doesn't.
+	t.Skip("No support for dual stack yet")
+	backend := NewEchoServer(t, "tcp", "[::1]:0")
+	defer backend.Close()
+	backend.Run()
+	frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0}
+	proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
+	if err != nil {
+		t.Fatal(err)
+	}
+	ipv4ProxyAddr := &net.TCPAddr{
+		IP:   net.IPv4(127, 0, 0, 1),
+		Port: proxy.FrontendAddr().(*net.TCPAddr).Port,
+	}
+	testProxyAt(t, "tcp", proxy, ipv4ProxyAddr.String())
+}
+
+func TestUDP4Proxy(t *testing.T) {
+	backend := NewEchoServer(t, "udp", "127.0.0.1:0")
+	defer backend.Close()
+	backend.Run()
+	frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
+	proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
+	if err != nil {
+		t.Fatal(err)
+	}
+	testProxy(t, "udp", proxy)
+}
+
+func TestUDP6Proxy(t *testing.T) {
+	backend := NewEchoServer(t, "udp", "[::1]:0")
+	defer backend.Close()
+	backend.Run()
+	frontendAddr := &net.UDPAddr{IP: net.IPv6loopback, Port: 0}
+	proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
+	if err != nil {
+		t.Fatal(err)
+	}
+	testProxy(t, "udp", proxy)
+}
+
+func TestUDPWriteError(t *testing.T) {
+	frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
+	// Hopefully, this port will be free: */
+	backendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 25587}
+	proxy, err := NewProxy(frontendAddr, backendAddr)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer proxy.Close()
+	go proxy.Run()
+	client, err := net.Dial("udp", "127.0.0.1:25587")
+	if err != nil {
+		t.Fatalf("Can't connect to the proxy: %v", err)
+	}
+	defer client.Close()
+	// Make sure the proxy doesn't stop when there is no actual backend:
+	client.Write(testBuf)
+	client.Write(testBuf)
+	backend := NewEchoServer(t, "udp", "127.0.0.1:25587")
+	defer backend.Close()
+	backend.Run()
+	client.SetDeadline(time.Now().Add(10 * time.Second))
+	if _, err = client.Write(testBuf); err != nil {
+		t.Fatal(err)
+	}
+	recvBuf := make([]byte, testBufSize)
+	if _, err = client.Read(recvBuf); err != nil {
+		t.Fatal(err)
+	}
+	if !bytes.Equal(testBuf, recvBuf) {
+		t.Fatal(fmt.Errorf("Expected [%v] but got [%v]", testBuf, recvBuf))
+	}
+}

+ 29 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/proxy.go

@@ -0,0 +1,29 @@
+package proxy
+
+import (
+	"fmt"
+	"net"
+)
+
+type Proxy interface {
+	// Start forwarding traffic back and forth the front and back-end
+	// addresses.
+	Run()
+	// Stop forwarding traffic and close both ends of the Proxy.
+	Close()
+	// Return the address on which the proxy is listening.
+	FrontendAddr() net.Addr
+	// Return the proxied address.
+	BackendAddr() net.Addr
+}
+
+func NewProxy(frontendAddr, backendAddr net.Addr) (Proxy, error) {
+	switch frontendAddr.(type) {
+	case *net.UDPAddr:
+		return NewUDPProxy(frontendAddr.(*net.UDPAddr), backendAddr.(*net.UDPAddr))
+	case *net.TCPAddr:
+		return NewTCPProxy(frontendAddr.(*net.TCPAddr), backendAddr.(*net.TCPAddr))
+	default:
+		panic(fmt.Errorf("Unsupported protocol"))
+	}
+}

+ 22 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/stub_proxy.go

@@ -0,0 +1,22 @@
+package proxy
+
+import (
+	"net"
+)
+
+type StubProxy struct {
+	frontendAddr net.Addr
+	backendAddr  net.Addr
+}
+
+func (p *StubProxy) Run()                   {}
+func (p *StubProxy) Close()                 {}
+func (p *StubProxy) FrontendAddr() net.Addr { return p.frontendAddr }
+func (p *StubProxy) BackendAddr() net.Addr  { return p.backendAddr }
+
+func NewStubProxy(frontendAddr, backendAddr net.Addr) (Proxy, error) {
+	return &StubProxy{
+		frontendAddr: frontendAddr,
+		backendAddr:  backendAddr,
+	}, nil
+}

+ 90 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/tcp_proxy.go

@@ -0,0 +1,90 @@
+package proxy
+
+import (
+	"io"
+	"net"
+	"syscall"
+
+	log "github.com/Sirupsen/logrus"
+)
+
+type TCPProxy struct {
+	listener     *net.TCPListener
+	frontendAddr *net.TCPAddr
+	backendAddr  *net.TCPAddr
+}
+
+func NewTCPProxy(frontendAddr, backendAddr *net.TCPAddr) (*TCPProxy, error) {
+	listener, err := net.ListenTCP("tcp", frontendAddr)
+	if err != nil {
+		return nil, err
+	}
+	// If the port in frontendAddr was 0 then ListenTCP will have a picked
+	// a port to listen on, hence the call to Addr to get that actual port:
+	return &TCPProxy{
+		listener:     listener,
+		frontendAddr: listener.Addr().(*net.TCPAddr),
+		backendAddr:  backendAddr,
+	}, nil
+}
+
+func (proxy *TCPProxy) clientLoop(client *net.TCPConn, quit chan bool) {
+	backend, err := net.DialTCP("tcp", nil, proxy.backendAddr)
+	if err != nil {
+		log.Printf("Can't forward traffic to backend tcp/%v: %s\n", proxy.backendAddr, err)
+		client.Close()
+		return
+	}
+
+	event := make(chan int64)
+	var broker = func(to, from *net.TCPConn) {
+		written, err := io.Copy(to, from)
+		if err != nil {
+			// If the socket we are writing to is shutdown with
+			// SHUT_WR, forward it to the other end of the pipe:
+			if err, ok := err.(*net.OpError); ok && err.Err == syscall.EPIPE {
+				from.CloseWrite()
+			}
+		}
+		to.CloseRead()
+		event <- written
+	}
+
+	go broker(client, backend)
+	go broker(backend, client)
+
+	var transferred int64 = 0
+	for i := 0; i < 2; i++ {
+		select {
+		case written := <-event:
+			transferred += written
+		case <-quit:
+			// Interrupt the two brokers and "join" them.
+			client.Close()
+			backend.Close()
+			for ; i < 2; i++ {
+				transferred += <-event
+			}
+			return
+		}
+	}
+	client.Close()
+	backend.Close()
+}
+
+func (proxy *TCPProxy) Run() {
+	quit := make(chan bool)
+	defer close(quit)
+	for {
+		client, err := proxy.listener.Accept()
+		if err != nil {
+			log.Printf("Stopping proxy on tcp/%v for tcp/%v (%s)", proxy.frontendAddr, proxy.backendAddr, err)
+			return
+		}
+		go proxy.clientLoop(client.(*net.TCPConn), quit)
+	}
+}
+
+func (proxy *TCPProxy) Close()                 { proxy.listener.Close() }
+func (proxy *TCPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr }
+func (proxy *TCPProxy) BackendAddr() net.Addr  { return proxy.backendAddr }

+ 158 - 0
libnetwork/Godeps/_workspace/src/github.com/docker/docker/pkg/proxy/udp_proxy.go

@@ -0,0 +1,158 @@
+package proxy
+
+import (
+	"encoding/binary"
+	"net"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	log "github.com/Sirupsen/logrus"
+)
+
+const (
+	UDPConnTrackTimeout = 90 * time.Second
+	UDPBufSize          = 65507
+)
+
+// A net.Addr where the IP is split into two fields so you can use it as a key
+// in a map:
+type connTrackKey struct {
+	IPHigh uint64
+	IPLow  uint64
+	Port   int
+}
+
+func newConnTrackKey(addr *net.UDPAddr) *connTrackKey {
+	if len(addr.IP) == net.IPv4len {
+		return &connTrackKey{
+			IPHigh: 0,
+			IPLow:  uint64(binary.BigEndian.Uint32(addr.IP)),
+			Port:   addr.Port,
+		}
+	}
+	return &connTrackKey{
+		IPHigh: binary.BigEndian.Uint64(addr.IP[:8]),
+		IPLow:  binary.BigEndian.Uint64(addr.IP[8:]),
+		Port:   addr.Port,
+	}
+}
+
+type connTrackMap map[connTrackKey]*net.UDPConn
+
+type UDPProxy struct {
+	listener       *net.UDPConn
+	frontendAddr   *net.UDPAddr
+	backendAddr    *net.UDPAddr
+	connTrackTable connTrackMap
+	connTrackLock  sync.Mutex
+}
+
+func NewUDPProxy(frontendAddr, backendAddr *net.UDPAddr) (*UDPProxy, error) {
+	listener, err := net.ListenUDP("udp", frontendAddr)
+	if err != nil {
+		return nil, err
+	}
+	return &UDPProxy{
+		listener:       listener,
+		frontendAddr:   listener.LocalAddr().(*net.UDPAddr),
+		backendAddr:    backendAddr,
+		connTrackTable: make(connTrackMap),
+	}, nil
+}
+
+func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr, clientKey *connTrackKey) {
+	defer func() {
+		proxy.connTrackLock.Lock()
+		delete(proxy.connTrackTable, *clientKey)
+		proxy.connTrackLock.Unlock()
+		proxyConn.Close()
+	}()
+
+	readBuf := make([]byte, UDPBufSize)
+	for {
+		proxyConn.SetReadDeadline(time.Now().Add(UDPConnTrackTimeout))
+	again:
+		read, err := proxyConn.Read(readBuf)
+		if err != nil {
+			if err, ok := err.(*net.OpError); ok && err.Err == syscall.ECONNREFUSED {
+				// This will happen if the last write failed
+				// (e.g: nothing is actually listening on the
+				// proxied port on the container), ignore it
+				// and continue until UDPConnTrackTimeout
+				// expires:
+				goto again
+			}
+			return
+		}
+		for i := 0; i != read; {
+			written, err := proxy.listener.WriteToUDP(readBuf[i:read], clientAddr)
+			if err != nil {
+				return
+			}
+			i += written
+		}
+	}
+}
+
+func (proxy *UDPProxy) Run() {
+	readBuf := make([]byte, UDPBufSize)
+	for {
+		read, from, err := proxy.listener.ReadFromUDP(readBuf)
+		if err != nil {
+			// NOTE: Apparently ReadFrom doesn't return
+			// ECONNREFUSED like Read do (see comment in
+			// UDPProxy.replyLoop)
+			if !isClosedError(err) {
+				log.Printf("Stopping proxy on udp/%v for udp/%v (%s)", proxy.frontendAddr, proxy.backendAddr, err)
+			}
+			break
+		}
+
+		fromKey := newConnTrackKey(from)
+		proxy.connTrackLock.Lock()
+		proxyConn, hit := proxy.connTrackTable[*fromKey]
+		if !hit {
+			proxyConn, err = net.DialUDP("udp", nil, proxy.backendAddr)
+			if err != nil {
+				log.Printf("Can't proxy a datagram to udp/%s: %s\n", proxy.backendAddr, err)
+				proxy.connTrackLock.Unlock()
+				continue
+			}
+			proxy.connTrackTable[*fromKey] = proxyConn
+			go proxy.replyLoop(proxyConn, from, fromKey)
+		}
+		proxy.connTrackLock.Unlock()
+		for i := 0; i != read; {
+			written, err := proxyConn.Write(readBuf[i:read])
+			if err != nil {
+				log.Printf("Can't proxy a datagram to udp/%s: %s\n", proxy.backendAddr, err)
+				break
+			}
+			i += written
+		}
+	}
+}
+
+func (proxy *UDPProxy) Close() {
+	proxy.listener.Close()
+	proxy.connTrackLock.Lock()
+	defer proxy.connTrackLock.Unlock()
+	for _, conn := range proxy.connTrackTable {
+		conn.Close()
+	}
+}
+
+func (proxy *UDPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr }
+func (proxy *UDPProxy) BackendAddr() net.Addr  { return proxy.backendAddr }
+
+func isClosedError(err error) bool {
+	/* This comparison is ugly, but unfortunately, net.go doesn't export errClosing.
+	 * See:
+	 * http://golang.org/src/pkg/net/net.go
+	 * https://code.google.com/p/go/issues/detail?id=4337
+	 * https://groups.google.com/forum/#!msg/golang-nuts/0_aaCvBmOcM/SptmDyX1XJMJ
+	 */
+	return strings.HasSuffix(err.Error(), "use of closed network connection")
+}

+ 192 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/LICENSE

@@ -0,0 +1,192 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2014 Vishvananda Ishaya.
+   Copyright 2014 Docker, Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 49 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/README.md

@@ -0,0 +1,49 @@
+# netns - network namespaces in go #
+
+The netns package provides an ultra-simple interface for handling
+network namespaces in go. Changing namespaces requires elevated
+privileges, so in most cases this code needs to be run as root.
+
+## Local Build and Test ##
+
+You can use go get command:
+
+    go get github.com/vishvananda/netns
+
+Testing (requires root):
+
+    sudo -E go test github.com/vishvananda/netns
+
+## Example ##
+
+```go
+package main
+
+import (
+    "net"
+    "runtime"
+    "github.com/vishvananada/netns"
+)
+
+func main() {
+    // Lock the OS Thread so we don't accidentally switch namespaces
+    runtime.LockOSThread()
+    defer runtime.UnlockOSThread()
+
+    // Save the current network namespace
+    origns, _ := netns.Get()
+    defer origns.Close()
+
+    // Create a new network namespace
+    newns, _ := netns.New()
+    defer newns.Close()
+
+    // Do something with tne network namespace
+    ifaces, _ := net.Interfaces()
+    fmt.Printf("Interfaces: %v\n", ifaces)
+
+    // Switch back to the original namespace
+    netns.Set(origns)
+}
+
+```

+ 66 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns.go

@@ -0,0 +1,66 @@
+// Package netns allows ultra-simple network namespace handling. NsHandles
+// can be retrieved and set. Note that the current namespace is thread
+// local so actions that set and reset namespaces should use LockOSThread
+// to make sure the namespace doesn't change due to a goroutine switch.
+// It is best to close NsHandles when you are done with them. This can be
+// accomplished via a `defer ns.Close()` on the handle. Changing namespaces
+// requires elevated privileges, so in most cases this code needs to be run
+// as root.
+package netns
+
+import (
+	"fmt"
+	"syscall"
+)
+// NsHandle is a handle to a network namespace. It can be cast directly
+// to an int and used as a file descriptor.
+type NsHandle int
+
+// Equal determines if two network handles refer to the same network
+// namespace. This is done by comparing the device and inode that the
+// file descripors point to.
+func (ns NsHandle) Equal(other NsHandle) bool {
+	if ns == other {
+		return true
+	}
+	var s1, s2 syscall.Stat_t
+	if err := syscall.Fstat(int(ns), &s1); err != nil {
+		return false
+	}
+	if err := syscall.Fstat(int(other), &s2); err != nil {
+		return false
+	}
+	return (s1.Dev == s2.Dev) && (s1.Ino == s2.Ino)
+}
+
+// String shows the file descriptor number and its dev and inode.
+func (ns NsHandle) String() string {
+	var s syscall.Stat_t
+	if ns == -1 {
+		return "NS(None)"
+	}
+	if err := syscall.Fstat(int(ns), &s); err != nil {
+		return fmt.Sprintf("NS(%d: unknown)", ns)
+	}
+	return fmt.Sprintf("NS(%d: %d, %d)", ns, s.Dev, s.Ino)
+}
+
+// IsOpen returns true if Close() has not been called.
+func (ns NsHandle) IsOpen() bool {
+	return ns != -1
+}
+
+// Close closes the NsHandle and resets its file descriptor to -1.
+// It is not safe to use an NsHandle after Close() is called.
+func (ns *NsHandle) Close() error {
+	if err := syscall.Close(int(*ns)); err != nil {
+		return err
+	}
+	(*ns) = -1
+	return nil
+}
+
+// Get an empty (closed) NsHandle
+func None() NsHandle {
+	return NsHandle(-1)
+}

+ 206 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_linux.go

@@ -0,0 +1,206 @@
+package netns
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+)
+
+const (
+	// These constants belong in the syscall library but have not been
+	// added yet.
+	CLONE_NEWUTS  = 0x04000000 /* New utsname group? */
+	CLONE_NEWIPC  = 0x08000000 /* New ipcs */
+	CLONE_NEWUSER = 0x10000000 /* New user namespace */
+	CLONE_NEWPID  = 0x20000000 /* New pid namespace */
+	CLONE_NEWNET  = 0x40000000 /* New network namespace */
+	CLONE_IO      = 0x80000000 /* Get io context */
+)
+
+// Setns sets namespace using syscall. Note that this should be a method
+// in syscall but it has not been added.
+func Setns(ns NsHandle, nstype int) (err error) {
+	_, _, e1 := syscall.Syscall(SYS_SETNS, uintptr(ns), uintptr(nstype), 0)
+	if e1 != 0 {
+		err = e1
+	}
+	return
+}
+
+// Set sets the current network namespace to the namespace represented
+// by NsHandle.
+func Set(ns NsHandle) (err error) {
+	return Setns(ns, CLONE_NEWNET)
+}
+
+// New creates a new network namespace and returns a handle to it.
+func New() (ns NsHandle, err error) {
+	if err := syscall.Unshare(CLONE_NEWNET); err != nil {
+		return -1, err
+	}
+	return Get()
+}
+
+// Get gets a handle to the current threads network namespace.
+func Get() (NsHandle, error) {
+	return GetFromThread(os.Getpid(), syscall.Gettid())
+}
+
+// GetFromName gets a handle to a named network namespace such as one
+// created by `ip netns add`.
+func GetFromName(name string) (NsHandle, error) {
+	fd, err := syscall.Open(fmt.Sprintf("/var/run/netns/%s", name), syscall.O_RDONLY, 0)
+	if err != nil {
+		return -1, err
+	}
+	return NsHandle(fd), nil
+}
+
+// GetFromPid gets a handle to the network namespace of a given pid.
+func GetFromPid(pid int) (NsHandle, error) {
+	fd, err := syscall.Open(fmt.Sprintf("/proc/%d/ns/net", pid), syscall.O_RDONLY, 0)
+	if err != nil {
+		return -1, err
+	}
+	return NsHandle(fd), nil
+}
+
+// GetFromThread gets a handle to the network namespace of a given pid and tid.
+func GetFromThread(pid, tid int) (NsHandle, error) {
+	name := fmt.Sprintf("/proc/%d/task/%d/ns/net", pid, tid)
+	fd, err := syscall.Open(name, syscall.O_RDONLY, 0)
+	if err != nil {
+		return -1, err
+	}
+	return NsHandle(fd), nil
+}
+
+// GetFromDocker gets a handle to the network namespace of a docker container.
+// Id is prefixed matched against the running docker containers, so a short
+// identifier can be used as long as it isn't ambiguous.
+func GetFromDocker(id string) (NsHandle, error) {
+	pid, err := getPidForContainer(id)
+	if err != nil {
+		return -1, err
+	}
+	return GetFromPid(pid)
+}
+
+// borrowed from docker/utils/utils.go
+func findCgroupMountpoint(cgroupType string) (string, error) {
+	output, err := ioutil.ReadFile("/proc/mounts")
+	if err != nil {
+		return "", err
+	}
+
+	// /proc/mounts has 6 fields per line, one mount per line, e.g.
+	// cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0
+	for _, line := range strings.Split(string(output), "\n") {
+		parts := strings.Split(line, " ")
+		if len(parts) == 6 && parts[2] == "cgroup" {
+			for _, opt := range strings.Split(parts[3], ",") {
+				if opt == cgroupType {
+					return parts[1], nil
+				}
+			}
+		}
+	}
+
+	return "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType)
+}
+
+// Returns the relative path to the cgroup docker is running in.
+// borrowed from docker/utils/utils.go
+// modified to get the docker pid instead of using /proc/self
+func getThisCgroup(cgroupType string) (string, error) {
+	dockerpid, err := ioutil.ReadFile("/var/run/docker.pid")
+	if err != nil {
+		return "", err
+	}
+	result := strings.Split(string(dockerpid), "\n")
+	if len(result) == 0 || len(result[0]) == 0 {
+		return "", fmt.Errorf("docker pid not found in /var/run/docker.pid")
+	}
+	pid, err := strconv.Atoi(result[0])
+
+	output, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid))
+	if err != nil {
+		return "", err
+	}
+	for _, line := range strings.Split(string(output), "\n") {
+		parts := strings.Split(line, ":")
+		// any type used by docker should work
+		if parts[1] == cgroupType {
+			return parts[2], nil
+		}
+	}
+	return "", fmt.Errorf("cgroup '%s' not found in /proc/%d/cgroup", cgroupType, pid)
+}
+
+// Returns the first pid in a container.
+// borrowed from docker/utils/utils.go
+// modified to only return the first pid
+// modified to glob with id
+// modified to search for newer docker containers
+func getPidForContainer(id string) (int, error) {
+	pid := 0
+
+	// memory is chosen randomly, any cgroup used by docker works
+	cgroupType := "memory"
+
+	cgroupRoot, err := findCgroupMountpoint(cgroupType)
+	if err != nil {
+		return pid, err
+	}
+
+	cgroupThis, err := getThisCgroup(cgroupType)
+	if err != nil {
+		return pid, err
+	}
+
+	id += "*"
+
+	attempts := []string{
+		filepath.Join(cgroupRoot, cgroupThis, id, "tasks"),
+		// With more recent lxc versions use, cgroup will be in lxc/
+		filepath.Join(cgroupRoot, cgroupThis, "lxc", id, "tasks"),
+		// With more recent dockee, cgroup will be in docker/
+		filepath.Join(cgroupRoot, cgroupThis, "docker", id, "tasks"),
+	}
+
+	var filename string
+	for _, attempt := range attempts {
+		filenames, _ := filepath.Glob(attempt)
+		if len(filenames) > 1 {
+			return pid, fmt.Errorf("Ambiguous id supplied: %v", filenames)
+		} else if len(filenames) == 1 {
+			filename = filenames[0]
+			break
+		}
+	}
+
+	if filename == "" {
+		return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1])
+	}
+
+	output, err := ioutil.ReadFile(filename)
+	if err != nil {
+		return pid, err
+	}
+
+	result := strings.Split(string(output), "\n")
+	if len(result) == 0 || len(result[0]) == 0 {
+		return pid, fmt.Errorf("No pid found for container")
+	}
+
+	pid, err = strconv.Atoi(result[0])
+	if err != nil {
+		return pid, fmt.Errorf("Invalid pid '%s': %s", result[0], err)
+	}
+
+	return pid, nil
+}

+ 5 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_linux_386.go

@@ -0,0 +1,5 @@
+package netns
+
+const (
+	SYS_SETNS = 346
+)

+ 5 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_linux_amd.go

@@ -0,0 +1,5 @@
+package netns
+
+const (
+	SYS_SETNS = 308
+)

+ 5 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_linux_arm.go

@@ -0,0 +1,5 @@
+package netns
+
+const (
+	SYS_SETNS = 374
+)

+ 66 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_test.go

@@ -0,0 +1,66 @@
+package netns
+
+import (
+	"runtime"
+	"sync"
+	"testing"
+)
+
+func TestGetNewSetDelete(t *testing.T) {
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	origns, err := Get()
+	if err != nil {
+		t.Fatal(err)
+	}
+	newns, err := New()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if origns.Equal(newns) {
+		t.Fatal("New ns failed")
+	}
+	if err := Set(origns); err != nil {
+		t.Fatal(err)
+	}
+	newns.Close()
+	if newns.IsOpen() {
+		t.Fatal("newns still open after close", newns)
+	}
+	ns, err := Get()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !ns.Equal(origns) {
+		t.Fatal("Reset ns failed", origns, newns, ns)
+	}
+}
+
+func TestNone(t *testing.T) {
+	ns := None()
+	if ns.IsOpen() {
+		t.Fatal("None ns is open", ns)
+	}
+}
+
+func TestThreaded(t *testing.T) {
+	ncpu := runtime.GOMAXPROCS(-1)
+	if ncpu < 2 {
+		t.Skip("-cpu=2 or larger required")
+	}
+
+	// Lock this thread simply to ensure other threads get used.
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	wg := &sync.WaitGroup{}
+	for i := 0; i < ncpu; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			TestGetNewSetDelete(t)
+		}()
+	}
+	wg.Wait()
+}

+ 35 - 0
libnetwork/Godeps/_workspace/src/github.com/vishvananda/netns/netns_unspecified.go

@@ -0,0 +1,35 @@
+// +build !linux
+
+package netns
+
+import (
+	"errors"
+)
+
+var (
+	ErrNotImplemented = errors.New("not implemented")
+)
+
+func Set(ns Namespace) (err error) {
+	return ErrNotImplemented
+}
+
+func New() (ns Namespace, err error) {
+	return -1, ErrNotImplemented
+}
+
+func Get() (Namespace, error) {
+	return -1, ErrNotImplemented
+}
+
+func GetFromName(name string) (Namespace, error) {
+	return -1, ErrNotImplemented
+}
+
+func GetFromPid(pid int) (Namespace, error) {
+	return -1, ErrNotImplemented
+}
+
+func GetFromDocker(id string) (Namespace, error) {
+	return -1, ErrNotImplemented
+}

+ 91 - 4
libnetwork/namespace.go

@@ -1,6 +1,14 @@
 package libnetwork
 
-import "syscall"
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"syscall"
+
+	"github.com/vishvananda/netlink"
+	"github.com/vishvananda/netns"
+)
 
 // The networkNamespace type is the default implementation of the Namespace
 // interface. It simply creates a new network namespace, and moves an interface
@@ -11,17 +19,96 @@ type networkNamespace struct {
 }
 
 func createNetworkNamespace(path string) (Namespace, error) {
-	if err := reexec(cmdReexecCreateNamespace, path); err != nil {
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	origns, err := netns.Get()
+	if err != nil {
+		return nil, err
+	}
+	defer origns.Close()
+
+	if err := createNamespaceFile(path); err != nil {
+		return nil, err
+	}
+
+	defer netns.Set(origns)
+	newns, err := netns.New()
+	if err != nil {
+		return nil, err
+	}
+	defer newns.Close()
+
+	if err := loopbackUp(); err != nil {
 		return nil, err
 	}
+
+	if err := syscall.Mount("/proc/self/ns/net", path, "bind", syscall.MS_BIND, ""); err != nil {
+		return nil, err
+	}
+
 	return &networkNamespace{path: path}, nil
 }
 
+func createNamespaceFile(path string) (err error) {
+	var f *os.File
+	if f, err = os.Create(path); err == nil {
+		f.Close()
+	}
+	return err
+}
+
+func loopbackUp() error {
+	iface, err := netlink.LinkByName("lo")
+	if err != nil {
+		return err
+	}
+	return netlink.LinkSetUp(iface)
+}
+
 func (n *networkNamespace) AddInterface(i *Interface) error {
-	// TODO Open pipe, pass fd to child and write serialized Interface on it.
-	if err := reexec(cmdReexecMoveInterface, i.SrcName, i.DstName); err != nil {
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	origns, err := netns.Get()
+	if err != nil {
+		return err
+	}
+	defer origns.Close()
+
+	f, err := os.OpenFile(n.path, os.O_RDONLY, 0)
+	if err != nil {
+		return fmt.Errorf("failed get network namespace %q: %v", n.path, err)
+	}
+	defer f.Close()
+
+	// Find the network inteerface identified by the SrcName attribute.
+	iface, err := netlink.LinkByName(i.SrcName)
+	if err != nil {
+		return err
+	}
+
+	// Move the network interface to the destination namespace.
+	nsFD := f.Fd()
+	if err := netlink.LinkSetNsFd(iface, int(nsFD)); err != nil {
 		return err
 	}
+
+	if err = netns.Set(netns.NsHandle(nsFD)); err != nil {
+		return err
+	}
+	defer netns.Set(origns)
+
+	// Configure the interface now this is moved in the proper namespace.
+	if err := configureInterface(iface, i); err != nil {
+		return err
+	}
+
+	// Up the interface.
+	if err := netlink.LinkSetUp(iface); err != nil {
+		return err
+	}
+
 	n.interfaces = append(n.interfaces, i)
 	return nil
 }

+ 0 - 45
libnetwork/reexec.go

@@ -1,45 +0,0 @@
-package libnetwork
-
-import (
-	"fmt"
-	"os"
-	"os/exec"
-
-	dre "github.com/docker/docker/pkg/reexec"
-)
-
-type reexecCommand int
-
-const (
-	cmdReexecCreateNamespace reexecCommand = iota
-	cmdReexecMoveInterface
-)
-
-var reexecCommands = map[reexecCommand]struct {
-	Key        string
-	Entrypoint func()
-}{
-	cmdReexecCreateNamespace: {"netns-create", reexecCreateNamespace},
-	cmdReexecMoveInterface:   {"netns-moveif", reexecMoveInterface},
-}
-
-func init() {
-	for _, reexecCmd := range reexecCommands {
-		dre.Register(reexecCmd.Key, reexecCmd.Entrypoint)
-	}
-}
-
-func reexec(command reexecCommand, params ...string) error {
-	reexecCommand, ok := reexecCommands[command]
-	if !ok {
-		return fmt.Errorf("unknown reexec command %q", command)
-	}
-
-	cmd := &exec.Cmd{
-		Path:   dre.Self(),
-		Args:   append([]string{reexecCommand.Key}, params...),
-		Stdout: os.Stdout,
-		Stderr: os.Stderr,
-	}
-	return cmd.Run()
-}

+ 0 - 78
libnetwork/reexec_move_interface.go

@@ -1,78 +0,0 @@
-package libnetwork
-
-import (
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"runtime"
-	"syscall"
-
-	"github.com/vishvananda/netlink"
-)
-
-type setupError struct {
-	Message string
-}
-
-func (s setupError) Error() string {
-	return s.Message
-}
-
-func reexecMoveInterface() {
-	runtime.LockOSThread()
-
-	var (
-		err  error
-		pipe = os.NewFile(3, "child")
-	)
-
-	defer func() {
-		if err != nil {
-			ioutil.ReadAll(pipe)
-			if err := json.NewEncoder(pipe).Encode(setupError{Message: err.Error()}); err != nil {
-				panic(err)
-			}
-		}
-		pipe.Close()
-	}()
-
-	n := &Interface{}
-	if err = json.NewDecoder(pipe).Decode(n); err == nil {
-		err = setupInNS(os.Args[1], n)
-	}
-}
-
-func setupInNS(nsPath string, settings *Interface) error {
-	f, err := os.OpenFile(nsPath, os.O_RDONLY, 0)
-	if err != nil {
-		return fmt.Errorf("failed get network namespace %q: %v", nsPath, err)
-	}
-
-	// Find the network inteerface identified by the SrcName attribute.
-	iface, err := netlink.LinkByName(settings.SrcName)
-	if err != nil {
-		return err
-	}
-
-	// Move the network interface to the destination namespace.
-	nsFD := f.Fd()
-	if err := netlink.LinkSetNsFd(iface, int(nsFD)); err != nil {
-		return err
-	}
-	f.Close()
-
-	// Move the executing code to the destination namespace so we can start
-	// configure the interface.
-	if err := setns(nsFD, syscall.CLONE_NEWNET); err != nil {
-		return err
-	}
-
-	// Configure the interface now this is moved in the proper namespace.
-	if err := configureInterface(iface, settings); err != nil {
-		return err
-	}
-
-	// Up the interface.
-	return netlink.LinkSetUp(iface)
-}

+ 0 - 52
libnetwork/reexec_netns_create.go

@@ -1,52 +0,0 @@
-package libnetwork
-
-import (
-	"os"
-	"runtime"
-	"syscall"
-
-	log "github.com/Sirupsen/logrus"
-	"github.com/vishvananda/netlink"
-)
-
-func reexecCreateNamespace() {
-	runtime.LockOSThread()
-
-	if len(os.Args) < 2 {
-		log.Fatalf("no namespace path provided")
-	}
-
-	if err := createNamespaceFile(os.Args[1]); err != nil {
-		log.Fatal(err)
-	}
-
-	if err := syscall.Unshare(syscall.CLONE_NEWNET); err != nil {
-		log.Fatal(err)
-	}
-
-	if err := loopbackUp(); err != nil {
-		log.Fatal(err)
-	}
-
-	if err := syscall.Mount("/proc/self/ns/net", os.Args[1], "bind", syscall.MS_BIND, ""); err != nil {
-		log.Fatal(err)
-	}
-
-	os.Exit(0)
-}
-
-func createNamespaceFile(path string) (err error) {
-	var f *os.File
-	if f, err = os.Create(path); err == nil {
-		f.Close()
-	}
-	return err
-}
-
-func loopbackUp() error {
-	iface, err := netlink.LinkByName("lo")
-	if err != nil {
-		return err
-	}
-	return netlink.LinkSetUp(iface)
-}