瀏覽代碼

Merge branch 'libcontainer-in-docker' into add-libcontainer

Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
Michael Crosby 11 年之前
父節點
當前提交
89dbdb1f71

+ 1 - 0
container.go

@@ -530,6 +530,7 @@ func (container *Container) Start() (err error) {
 	}
 
 	populateCommand(container)
+	container.command.Env = env
 
 	// Setup logging of stdout and stderr to disk
 	if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {

+ 19 - 18
execdriver/lxc/term.go

@@ -6,6 +6,7 @@ import (
 	"github.com/kr/pty"
 	"io"
 	"os"
+	"os/exec"
 )
 
 func SetTerminal(command *execdriver.Command, pipes *execdriver.Pipes) error {
@@ -26,8 +27,8 @@ func SetTerminal(command *execdriver.Command, pipes *execdriver.Pipes) error {
 }
 
 type TtyConsole struct {
-	master *os.File
-	slave  *os.File
+	MasterPty *os.File
+	SlavePty  *os.File
 }
 
 func NewTtyConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*TtyConsole, error) {
@@ -36,28 +37,28 @@ func NewTtyConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*TtyCo
 		return nil, err
 	}
 	tty := &TtyConsole{
-		master: ptyMaster,
-		slave:  ptySlave,
+		MasterPty: ptyMaster,
+		SlavePty:  ptySlave,
 	}
-	if err := tty.attach(command, pipes); err != nil {
+	if err := tty.AttachPipes(&command.Cmd, pipes); err != nil {
 		tty.Close()
 		return nil, err
 	}
+	command.Console = tty.SlavePty.Name()
 	return tty, nil
 }
 
 func (t *TtyConsole) Master() *os.File {
-	return t.master
+	return t.MasterPty
 }
 
 func (t *TtyConsole) Resize(h, w int) error {
-	return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
+	return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
 }
 
-func (t *TtyConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes) error {
-	command.Stdout = t.slave
-	command.Stderr = t.slave
-	command.Console = t.slave.Name()
+func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error {
+	command.Stdout = t.SlavePty
+	command.Stderr = t.SlavePty
 
 	go func() {
 		if wb, ok := pipes.Stdout.(interface {
@@ -65,24 +66,24 @@ func (t *TtyConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes
 		}); ok {
 			defer wb.CloseWriters()
 		}
-		io.Copy(pipes.Stdout, t.master)
+		io.Copy(pipes.Stdout, t.MasterPty)
 	}()
 
 	if pipes.Stdin != nil {
-		command.Stdin = t.slave
+		command.Stdin = t.SlavePty
 		command.SysProcAttr.Setctty = true
 
 		go func() {
 			defer pipes.Stdin.Close()
-			io.Copy(t.master, pipes.Stdin)
+			io.Copy(t.MasterPty, pipes.Stdin)
 		}()
 	}
 	return nil
 }
 
 func (t *TtyConsole) Close() error {
-	t.slave.Close()
-	return t.master.Close()
+	t.SlavePty.Close()
+	return t.MasterPty.Close()
 }
 
 type StdConsole struct {
@@ -91,13 +92,13 @@ type StdConsole struct {
 func NewStdConsole(command *execdriver.Command, pipes *execdriver.Pipes) (*StdConsole, error) {
 	std := &StdConsole{}
 
-	if err := std.attach(command, pipes); err != nil {
+	if err := std.AttachPipes(&command.Cmd, pipes); err != nil {
 		return nil, err
 	}
 	return std, nil
 }
 
-func (s *StdConsole) attach(command *execdriver.Command, pipes *execdriver.Pipes) error {
+func (s *StdConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error {
 	command.Stdout = pipes.Stdout
 	command.Stderr = pipes.Stderr
 

+ 41 - 0
execdriver/namespaces/default_template.go

@@ -0,0 +1,41 @@
+package namespaces
+
+import (
+	"github.com/dotcloud/docker/pkg/cgroups"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+)
+
+// getDefaultTemplate returns the docker default for
+// the libcontainer configuration file
+func getDefaultTemplate() *libcontainer.Container {
+	return &libcontainer.Container{
+		Capabilities: libcontainer.Capabilities{
+			libcontainer.CAP_SETPCAP,
+			libcontainer.CAP_SYS_MODULE,
+			libcontainer.CAP_SYS_RAWIO,
+			libcontainer.CAP_SYS_PACCT,
+			libcontainer.CAP_SYS_ADMIN,
+			libcontainer.CAP_SYS_NICE,
+			libcontainer.CAP_SYS_RESOURCE,
+			libcontainer.CAP_SYS_TIME,
+			libcontainer.CAP_SYS_TTY_CONFIG,
+			libcontainer.CAP_MKNOD,
+			libcontainer.CAP_AUDIT_WRITE,
+			libcontainer.CAP_AUDIT_CONTROL,
+			libcontainer.CAP_MAC_ADMIN,
+			libcontainer.CAP_MAC_OVERRIDE,
+			libcontainer.CAP_NET_ADMIN,
+		},
+		Namespaces: libcontainer.Namespaces{
+			libcontainer.CLONE_NEWIPC,
+			libcontainer.CLONE_NEWNET,
+			libcontainer.CLONE_NEWNS,
+			libcontainer.CLONE_NEWPID,
+			libcontainer.CLONE_NEWUTS,
+		},
+		Cgroups: &cgroups.Cgroup{
+			Name:         "docker",
+			DeviceAccess: false,
+		},
+	}
+}

+ 216 - 0
execdriver/namespaces/driver.go

@@ -0,0 +1,216 @@
+package namespaces
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"github.com/dotcloud/docker/execdriver"
+	"github.com/dotcloud/docker/execdriver/lxc"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/pkg/libcontainer/nsinit"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"syscall"
+)
+
+const (
+	DriverName = "namespaces"
+	Version    = "0.1"
+)
+
+var (
+	ErrNotSupported = errors.New("not supported")
+)
+
+func init() {
+	execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
+		return nil
+	})
+}
+
+type driver struct {
+}
+
+func NewDriver() (*driver, error) {
+	return &driver{}, nil
+}
+
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+	var (
+		term        nsinit.Terminal
+		container   = createContainer(c)
+		factory     = &dockerCommandFactory{c}
+		stateWriter = &dockerStateWriter{
+			callback: startCallback,
+			c:        c,
+			dsw:      &nsinit.DefaultStateWriter{c.Rootfs},
+		}
+	)
+	if c.Tty {
+		term = &dockerTtyTerm{
+			pipes: pipes,
+		}
+	} else {
+		term = &dockerStdTerm{
+			pipes: pipes,
+		}
+	}
+	c.Terminal = term
+	if err := writeContainerFile(container, c.Rootfs); err != nil {
+		return -1, err
+	}
+	args := append([]string{c.Entrypoint}, c.Arguments...)
+	return nsinit.Exec(container, factory, stateWriter, term, "/nsinit.log", args)
+}
+
+func (d *driver) Kill(p *execdriver.Command, sig int) error {
+	return p.Process.Kill()
+}
+
+func (d *driver) Restore(c *execdriver.Command) error {
+	return ErrNotSupported
+}
+
+func (d *driver) Info(id string) execdriver.Info {
+	return nil
+}
+
+func (d *driver) Name() string {
+	return fmt.Sprintf("%s-%s", DriverName, Version)
+}
+
+func (d *driver) GetPidsForContainer(id string) ([]int, error) {
+	return nil, ErrNotSupported
+}
+
+func writeContainerFile(container *libcontainer.Container, rootfs string) error {
+	data, err := json.Marshal(container)
+	if err != nil {
+		return err
+	}
+	return ioutil.WriteFile(filepath.Join(rootfs, "container.json"), data, 0755)
+}
+
+func getEnv(key string, env []string) string {
+	for _, pair := range env {
+		parts := strings.Split(pair, "=")
+		if parts[0] == key {
+			return parts[1]
+		}
+	}
+	return ""
+}
+
+type dockerCommandFactory struct {
+	c *execdriver.Command
+}
+
+// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
+// defined on the container's configuration and use the current binary as the init with the
+// args provided
+func (d *dockerCommandFactory) Create(container *libcontainer.Container,
+	console, logFile string, syncFd uintptr, args []string) *exec.Cmd {
+	c := d.c
+	aname, _ := exec.LookPath("nsinit")
+	c.Path = aname
+	c.Args = append([]string{
+		aname,
+		"-console", console,
+		"-pipe", fmt.Sprint(syncFd),
+		"-log", logFile,
+		"init",
+	}, args...)
+	c.SysProcAttr = &syscall.SysProcAttr{
+		Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)),
+	}
+	c.Env = container.Env
+	c.Dir = c.Rootfs
+
+	return &c.Cmd
+}
+
+type dockerStateWriter struct {
+	dsw      nsinit.StateWriter
+	c        *execdriver.Command
+	callback execdriver.StartCallback
+}
+
+func (d *dockerStateWriter) WritePid(pid int) error {
+	err := d.dsw.WritePid(pid)
+	if d.callback != nil {
+		d.callback(d.c)
+	}
+	return err
+}
+
+func (d *dockerStateWriter) DeletePid() error {
+	return d.dsw.DeletePid()
+}
+
+func createContainer(c *execdriver.Command) *libcontainer.Container {
+	container := getDefaultTemplate()
+
+	container.Hostname = getEnv("HOSTNAME", c.Env)
+	container.Tty = c.Tty
+	container.User = c.User
+	container.WorkingDir = c.WorkingDir
+	container.Env = c.Env
+
+	container.Env = append(container.Env, "container=docker")
+
+	if c.Network != nil {
+		container.Network = &libcontainer.Network{
+			Mtu:     c.Network.Mtu,
+			Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen),
+			Gateway: c.Network.Gateway,
+			Type:    "veth",
+			Context: libcontainer.Context{
+				"prefix": "dock",
+				"bridge": c.Network.Bridge,
+			},
+		}
+	}
+	if c.Privileged {
+		container.Capabilities = nil
+	}
+	if c.Resources != nil {
+		container.Cgroups.CpuShares = c.Resources.CpuShares
+		container.Cgroups.Memory = c.Resources.Memory
+		container.Cgroups.MemorySwap = c.Resources.MemorySwap
+	}
+	return container
+}
+
+type dockerStdTerm struct {
+	lxc.StdConsole
+	pipes *execdriver.Pipes
+}
+
+func (d *dockerStdTerm) Attach(cmd *exec.Cmd) error {
+	return d.AttachPipes(cmd, d.pipes)
+}
+
+func (d *dockerStdTerm) SetMaster(master *os.File) {
+	// do nothing
+}
+
+type dockerTtyTerm struct {
+	lxc.TtyConsole
+	pipes *execdriver.Pipes
+}
+
+func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error {
+	go io.Copy(t.pipes.Stdout, t.MasterPty)
+	if t.pipes.Stdin != nil {
+		go io.Copy(t.MasterPty, t.pipes.Stdin)
+	}
+	return nil
+}
+
+func (t *dockerTtyTerm) SetMaster(master *os.File) {
+	t.MasterPty = master
+}

+ 26 - 0
execdriver/namespaces/term.go

@@ -0,0 +1,26 @@
+package namespaces
+
+import (
+	"github.com/dotcloud/docker/execdriver"
+	"github.com/dotcloud/docker/pkg/term"
+	"os"
+)
+
+type NsinitTerm struct {
+	master *os.File
+}
+
+func NewTerm(pipes *execdriver.Pipes, master *os.File) *NsinitTerm {
+	return &NsinitTerm{master}
+}
+
+func (t *NsinitTerm) Close() error {
+	return t.master.Close()
+}
+
+func (t *NsinitTerm) Resize(h, w int) error {
+	if t.master != nil {
+		return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
+	}
+	return nil
+}

+ 1 - 1
pkg/cgroups/cgroups.go

@@ -132,7 +132,7 @@ func (c *Cgroup) Apply(pid int) error {
 	// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
 	//
 	// we can pick any subsystem to find the root
-	cgroupRoot, err := FindCgroupMountpoint("memory")
+	cgroupRoot, err := FindCgroupMountpoint("cpu")
 	if err != nil {
 		return err
 	}

+ 7 - 2
pkg/libcontainer/README.md

@@ -45,12 +45,17 @@ Sample `container.json` file:
         "AUDIT_WRITE",
         "AUDIT_CONTROL",
         "MAC_OVERRIDE",
-        "MAC_ADMIN"
+        "MAC_ADMIN",
+        "NET_ADMIN"
     ],
     "network": {
+        "type": "veth",
+        "context": {
+            "bridge": "docker0",
+            "prefix": "dock"
+        },
         "address": "172.17.0.100/16",
         "gateway": "172.17.42.1",
-        "bridge": "docker0",
         "mtu": 1500
     },
     "cgroups": {

+ 9 - 4
pkg/libcontainer/container.go

@@ -4,6 +4,10 @@ import (
 	"github.com/dotcloud/docker/pkg/cgroups"
 )
 
+// Context is a generic key value pair that allows
+// arbatrary data to be sent
+type Context map[string]string
+
 // Container defines configuration options for how a
 // container is setup inside a directory and how a process should be executed
 type Container struct {
@@ -24,8 +28,9 @@ type Container struct {
 // The network configuration can be omited from a container causing the
 // container to be setup with the host's networking stack
 type Network struct {
-	Address string `json:"address,omitempty"`
-	Gateway string `json:"gateway,omitempty"`
-	Bridge  string `json:"bridge,omitempty"`
-	Mtu     int    `json:"mtu,omitempty"`
+	Type    string  `json:"type,omitempty"`    // type of networking to setup i.e. veth, macvlan, etc
+	Context Context `json:"context,omitempty"` // generic context for type specific networking options
+	Address string  `json:"address,omitempty"`
+	Gateway string  `json:"gateway,omitempty"`
+	Mtu     int     `json:"mtu,omitempty"`
 }

+ 7 - 2
pkg/libcontainer/container.json

@@ -28,12 +28,17 @@
         "AUDIT_WRITE",
         "AUDIT_CONTROL",
         "MAC_OVERRIDE",
-        "MAC_ADMIN"
+        "MAC_ADMIN",
+        "NET_ADMIN"
     ],
     "network": {
+        "type": "veth",
+        "context": {
+            "bridge": "docker0",
+            "prefix": "dock"
+        },
         "address": "172.17.0.100/16",
         "gateway": "172.17.42.1",
-        "bridge": "docker0",
         "mtu": 1500
     },
     "cgroups": {

+ 32 - 0
pkg/libcontainer/network/strategy.go

@@ -0,0 +1,32 @@
+package network
+
+import (
+	"errors"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+)
+
+var (
+	ErrNotValidStrategyType = errors.New("not a valid network strategy type")
+)
+
+var strategies = map[string]NetworkStrategy{
+	"veth": &Veth{},
+}
+
+// NetworkStrategy represends a specific network configuration for
+// a containers networking stack
+type NetworkStrategy interface {
+	Create(*libcontainer.Network, int) (libcontainer.Context, error)
+	Initialize(*libcontainer.Network, libcontainer.Context) error
+}
+
+// GetStrategy returns the specific network strategy for the
+// provided type.  If no strategy is registered for the type an
+// ErrNotValidStrategyType is returned.
+func GetStrategy(tpe string) (NetworkStrategy, error) {
+	s, exists := strategies[tpe]
+	if !exists {
+		return nil, ErrNotValidStrategyType
+	}
+	return s, nil
+}

+ 103 - 0
pkg/libcontainer/network/veth.go

@@ -0,0 +1,103 @@
+package network
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/pkg/libcontainer/utils"
+	"log"
+)
+
+type Veth struct {
+}
+
+func (v *Veth) Create(n *libcontainer.Network, nspid int) (libcontainer.Context, error) {
+	log.Printf("creating veth network")
+	var (
+		bridge string
+		prefix string
+		exists bool
+	)
+	if bridge, exists = n.Context["bridge"]; !exists {
+		return nil, fmt.Errorf("bridge does not exist in network context")
+	}
+	if prefix, exists = n.Context["prefix"]; !exists {
+		return nil, fmt.Errorf("veth prefix does not exist in network context")
+	}
+	name1, name2, err := createVethPair(prefix)
+	if err != nil {
+		return nil, err
+	}
+	context := libcontainer.Context{
+		"vethHost":  name1,
+		"vethChild": name2,
+	}
+	log.Printf("veth pair created %s <> %s", name1, name2)
+	if err := SetInterfaceMaster(name1, bridge); err != nil {
+		return context, err
+	}
+	if err := SetMtu(name1, n.Mtu); err != nil {
+		return context, err
+	}
+	if err := InterfaceUp(name1); err != nil {
+		return context, err
+	}
+	log.Printf("setting %s inside %d namespace", name2, nspid)
+	if err := SetInterfaceInNamespacePid(name2, nspid); err != nil {
+		return context, err
+	}
+	return context, nil
+}
+
+func (v *Veth) Initialize(config *libcontainer.Network, context libcontainer.Context) error {
+	var (
+		vethChild string
+		exists    bool
+	)
+	if vethChild, exists = context["vethChild"]; !exists {
+		return fmt.Errorf("vethChild does not exist in network context")
+	}
+	if err := InterfaceDown(vethChild); err != nil {
+		return fmt.Errorf("interface down %s %s", vethChild, err)
+	}
+	if err := ChangeInterfaceName(vethChild, "eth0"); err != nil {
+		return fmt.Errorf("change %s to eth0 %s", vethChild, err)
+	}
+	if err := SetInterfaceIp("eth0", config.Address); err != nil {
+		return fmt.Errorf("set eth0 ip %s", err)
+	}
+	if err := SetMtu("eth0", config.Mtu); err != nil {
+		return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err)
+	}
+	if err := InterfaceUp("eth0"); err != nil {
+		return fmt.Errorf("eth0 up %s", err)
+	}
+	if err := SetMtu("lo", config.Mtu); err != nil {
+		return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err)
+	}
+	if err := InterfaceUp("lo"); err != nil {
+		return fmt.Errorf("lo up %s", err)
+	}
+	if config.Gateway != "" {
+		if err := SetDefaultGateway(config.Gateway); err != nil {
+			return fmt.Errorf("set gateway to %s %s", config.Gateway, err)
+		}
+	}
+	return nil
+}
+
+// createVethPair will automatically generage two random names for
+// the veth pair and ensure that they have been created
+func createVethPair(prefix string) (name1 string, name2 string, err error) {
+	name1, err = utils.GenerateRandomName(prefix, 4)
+	if err != nil {
+		return
+	}
+	name2, err = utils.GenerateRandomName(prefix, 4)
+	if err != nil {
+		return
+	}
+	if err = CreateVethPair(name1, name2); err != nil {
+		return
+	}
+	return
+}

+ 34 - 0
pkg/libcontainer/nsinit/command.go

@@ -0,0 +1,34 @@
+package nsinit
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"os"
+	"os/exec"
+	"syscall"
+)
+
+type CommandFactory interface {
+	Create(container *libcontainer.Container, console, logFile string, syncFd uintptr, args []string) *exec.Cmd
+}
+
+type DefaultCommandFactory struct{}
+
+// Create will return an exec.Cmd with the Cloneflags set to the proper namespaces
+// defined on the container's configuration and use the current binary as the init with the
+// args provided
+func (c *DefaultCommandFactory) Create(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd {
+	// get our binary name so we can always reexec ourself
+	name := os.Args[0]
+	command := exec.Command(name, append([]string{
+		"-console", console,
+		"-pipe", fmt.Sprint(pipe),
+		"-log", logFile,
+		"init"}, args...)...)
+
+	command.SysProcAttr = &syscall.SysProcAttr{
+		Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)),
+	}
+	command.Env = container.Env
+	return command
+}

+ 53 - 154
pkg/libcontainer/nsinit/exec.go

@@ -3,14 +3,9 @@
 package nsinit
 
 import (
-	"fmt"
 	"github.com/dotcloud/docker/pkg/libcontainer"
 	"github.com/dotcloud/docker/pkg/libcontainer/network"
-	"github.com/dotcloud/docker/pkg/libcontainer/utils"
 	"github.com/dotcloud/docker/pkg/system"
-	"github.com/dotcloud/docker/pkg/term"
-	"io"
-	"io/ioutil"
 	"log"
 	"os"
 	"os/exec"
@@ -19,102 +14,65 @@ import (
 
 // Exec performes setup outside of a namespace so that a container can be
 // executed.  Exec is a high level function for working with container namespaces.
-func Exec(container *libcontainer.Container, logFile string, args []string) (int, error) {
+func Exec(container *libcontainer.Container,
+	factory CommandFactory, state StateWriter, term Terminal,
+	logFile string, args []string) (int, error) {
 	var (
 		master  *os.File
 		console string
 		err     error
-
-		inPipe           io.WriteCloser
-		outPipe, errPipe io.ReadCloser
 	)
 
-	if container.Tty {
-		log.Printf("setting up master and console")
-		master, console, err = createMasterAndConsole()
-		if err != nil {
-			return -1, err
-		}
-	}
-
 	// create a pipe so that we can syncronize with the namespaced process and
 	// pass the veth name to the child
-	r, w, err := os.Pipe()
+	syncPipe, err := NewSyncPipe()
 	if err != nil {
 		return -1, err
 	}
-	system.UsetCloseOnExec(r.Fd())
 
-	command := createCommand(container, console, logFile, r.Fd(), args)
-	if !container.Tty {
-		log.Printf("opening pipes on command")
-		if inPipe, err = command.StdinPipe(); err != nil {
-			return -1, err
-		}
-		if outPipe, err = command.StdoutPipe(); err != nil {
-			return -1, err
-		}
-		if errPipe, err = command.StderrPipe(); err != nil {
+	if container.Tty {
+		log.Printf("setting up master and console")
+		master, console, err = CreateMasterAndConsole()
+		if err != nil {
 			return -1, err
 		}
+		term.SetMaster(master)
+	}
+
+	command := factory.Create(container, console, logFile, syncPipe.child.Fd(), args)
+	if err := term.Attach(command); err != nil {
+		return -1, err
 	}
+	defer term.Close()
 
 	log.Printf("staring init")
 	if err := command.Start(); err != nil {
 		return -1, err
 	}
-	log.Printf("writting state file")
-	if err := writePidFile(command); err != nil {
+	log.Printf("writing state file")
+	if err := state.WritePid(command.Process.Pid); err != nil {
 		command.Process.Kill()
 		return -1, err
 	}
-	defer deletePidFile()
+	defer func() {
+		log.Printf("removing state file")
+		state.DeletePid()
+	}()
 
 	// Do this before syncing with child so that no children
 	// can escape the cgroup
-	if container.Cgroups != nil {
-		log.Printf("setting up cgroups")
-		if err := container.Cgroups.Apply(command.Process.Pid); err != nil {
-			command.Process.Kill()
-			return -1, err
-		}
+	if err := SetupCgroups(container, command.Process.Pid); err != nil {
+		command.Process.Kill()
+		return -1, err
 	}
-
-	if container.Network != nil {
-		log.Printf("creating veth pair")
-		vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid)
-		if err != nil {
-			return -1, err
-		}
-		log.Printf("sending %s as veth pair name", vethPair)
-		sendVethName(w, vethPair)
+	if err := InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil {
+		command.Process.Kill()
+		return -1, err
 	}
 
 	// Sync with child
 	log.Printf("closing sync pipes")
-	w.Close()
-	r.Close()
-
-	if container.Tty {
-		log.Printf("starting copy for tty")
-		go io.Copy(os.Stdout, master)
-		go io.Copy(master, os.Stdin)
-
-		state, err := setupWindow(master)
-		if err != nil {
-			command.Process.Kill()
-			return -1, err
-		}
-		defer term.RestoreTerminal(os.Stdin.Fd(), state)
-	} else {
-		log.Printf("starting copy for std pipes")
-		go func() {
-			defer inPipe.Close()
-			io.Copy(inPipe, os.Stdin)
-		}()
-		go io.Copy(os.Stdout, outPipe)
-		go io.Copy(os.Stderr, errPipe)
-	}
+	syncPipe.Close()
 
 	log.Printf("waiting on process")
 	if err := command.Wait(); err != nil {
@@ -126,55 +84,38 @@ func Exec(container *libcontainer.Container, logFile string, args []string) (int
 	return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
 }
 
-// sendVethName writes the veth pair name to the child's stdin then closes the
-// pipe so that the child stops waiting for more data
-func sendVethName(pipe io.Writer, name string) {
-	fmt.Fprint(pipe, name)
-}
-
-// initializeContainerVeth will create a veth pair and setup the host's
-// side of the pair by setting the specified bridge as the master and bringing
-// up the interface.
-//
-// Then will with set the other side of the veth pair into the container's namespaced
-// using the pid and returns the veth's interface name to provide to the container to
-// finish setting up the interface inside the namespace
-func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) {
-	name1, name2, err := createVethPair()
-	if err != nil {
-		return "", err
-	}
-	log.Printf("veth pair created %s <> %s", name1, name2)
-	if err := network.SetInterfaceMaster(name1, bridge); err != nil {
-		return "", err
-	}
-	if err := network.SetMtu(name1, mtu); err != nil {
-		return "", err
-	}
-	if err := network.InterfaceUp(name1); err != nil {
-		return "", err
-	}
-	log.Printf("setting %s inside %d namespace", name2, nspid)
-	if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil {
-		return "", err
+func SetupCgroups(container *libcontainer.Container, nspid int) error {
+	if container.Cgroups != nil {
+		log.Printf("setting up cgroups")
+		if err := container.Cgroups.Apply(nspid); err != nil {
+			return err
+		}
 	}
-	return name2, nil
+	return nil
 }
 
-func setupWindow(master *os.File) (*term.State, error) {
-	ws, err := term.GetWinsize(os.Stdin.Fd())
-	if err != nil {
-		return nil, err
-	}
-	if err := term.SetWinsize(master.Fd(), ws); err != nil {
-		return nil, err
+func InitializeNetworking(container *libcontainer.Container, nspid int, pipe *SyncPipe) error {
+	if container.Network != nil {
+		log.Printf("creating host network configuration type %s", container.Network.Type)
+		strategy, err := network.GetStrategy(container.Network.Type)
+		if err != nil {
+			return err
+		}
+		networkContext, err := strategy.Create(container.Network, nspid)
+		if err != nil {
+			return err
+		}
+		log.Printf("sending %v as network context", networkContext)
+		if err := pipe.SendToChild(networkContext); err != nil {
+			return err
+		}
 	}
-	return term.SetRawTerminal(os.Stdin.Fd())
+	return nil
 }
 
-// createMasterAndConsole will open /dev/ptmx on the host and retreive the
+// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the
 // pts name for use as the pty slave inside the container
-func createMasterAndConsole() (*os.File, string, error) {
+func CreateMasterAndConsole() (*os.File, string, error) {
 	master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
 	if err != nil {
 		return nil, "", err
@@ -188,45 +129,3 @@ func createMasterAndConsole() (*os.File, string, error) {
 	}
 	return master, console, nil
 }
-
-// createVethPair will automatically generage two random names for
-// the veth pair and ensure that they have been created
-func createVethPair() (name1 string, name2 string, err error) {
-	name1, err = utils.GenerateRandomName("dock", 4)
-	if err != nil {
-		return
-	}
-	name2, err = utils.GenerateRandomName("dock", 4)
-	if err != nil {
-		return
-	}
-	if err = network.CreateVethPair(name1, name2); err != nil {
-		return
-	}
-	return
-}
-
-// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container
-func writePidFile(command *exec.Cmd) error {
-	return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655)
-}
-
-func deletePidFile() error {
-	return os.Remove(".nspid")
-}
-
-// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
-// defined on the container's configuration and use the current binary as the init with the
-// args provided
-func createCommand(container *libcontainer.Container, console, logFile string, pipe uintptr, args []string) *exec.Cmd {
-	command := exec.Command("nsinit", append([]string{
-		"-console", console,
-		"-pipe", fmt.Sprint(pipe),
-		"-log", logFile,
-		"init"}, args...)...)
-
-	command.SysProcAttr = &syscall.SysProcAttr{
-		Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)),
-	}
-	return command
-}

+ 30 - 51
pkg/libcontainer/nsinit/init.go

@@ -8,17 +8,16 @@ import (
 	"github.com/dotcloud/docker/pkg/libcontainer/capabilities"
 	"github.com/dotcloud/docker/pkg/libcontainer/network"
 	"github.com/dotcloud/docker/pkg/system"
-	"io"
-	"io/ioutil"
 	"log"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"syscall"
 )
 
 // Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
 // and other options required for the new container.
-func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe io.ReadCloser, args []string) error {
+func Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error {
 	rootfs, err := resolveRootfs(uncleanRootfs)
 	if err != nil {
 		return err
@@ -26,19 +25,18 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe
 	log.Printf("initializing namespace at %s", rootfs)
 
 	// We always read this as it is a way to sync with the parent as well
-	tempVethName, err := getVethName(pipe)
+	context, err := syncPipe.ReadFromParent()
 	if err != nil {
+		syncPipe.Close()
 		return err
 	}
-	if tempVethName != "" {
-		log.Printf("received veth name %s", tempVethName)
-	}
+	syncPipe.Close()
+	log.Printf("received context from parent %v", context)
+
 	if console != "" {
 		log.Printf("setting up console for %s", console)
 		// close pipes so that we can replace it with the pty
-		os.Stdin.Close()
-		os.Stdout.Close()
-		os.Stderr.Close()
+		closeStdPipes()
 		slave, err := openTerminal(console, syscall.O_RDWR)
 		if err != nil {
 			return fmt.Errorf("open terminal %s", err)
@@ -61,7 +59,7 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe
 	if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil {
 		return fmt.Errorf("setup mount namespace %s", err)
 	}
-	if err := setupVethNetwork(container.Network, tempVethName); err != nil {
+	if err := setupNetwork(container.Network, context); err != nil {
 		return fmt.Errorf("setup networking %s", err)
 	}
 	if err := system.Sethostname(container.Hostname); err != nil {
@@ -80,13 +78,27 @@ func Init(container *libcontainer.Container, uncleanRootfs, console string, pipe
 			return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
 		}
 	}
-	log.Printf("execing %s goodbye", args[0])
-	if err := system.Exec(args[0], args[0:], container.Env); err != nil {
+	return execArgs(args, container.Env)
+}
+
+func execArgs(args []string, env []string) error {
+	name, err := exec.LookPath(args[0])
+	if err != nil {
+		return err
+	}
+	log.Printf("execing %s goodbye", name)
+	if err := system.Exec(name, args[0:], env); err != nil {
 		return fmt.Errorf("exec %s", err)
 	}
 	panic("unreachable")
 }
 
+func closeStdPipes() {
+	os.Stdin.Close()
+	os.Stdout.Close()
+	os.Stderr.Close()
+}
+
 // resolveRootfs ensures that the current working directory is
 // not a symlink and returns the absolute path to the rootfs
 func resolveRootfs(uncleanRootfs string) (string, error) {
@@ -139,46 +151,13 @@ func openTerminal(name string, flag int) (*os.File, error) {
 // setupVethNetwork uses the Network config if it is not nil to initialize
 // the new veth interface inside the container for use by changing the name to eth0
 // setting the MTU and IP address along with the default gateway
-func setupVethNetwork(config *libcontainer.Network, tempVethName string) error {
+func setupNetwork(config *libcontainer.Network, context libcontainer.Context) error {
 	if config != nil {
-		if err := network.InterfaceDown(tempVethName); err != nil {
-			return fmt.Errorf("interface down %s %s", tempVethName, err)
-		}
-		if err := network.ChangeInterfaceName(tempVethName, "eth0"); err != nil {
-			return fmt.Errorf("change %s to eth0 %s", tempVethName, err)
-		}
-		if err := network.SetInterfaceIp("eth0", config.Address); err != nil {
-			return fmt.Errorf("set eth0 ip %s", err)
-		}
-		if err := network.SetMtu("eth0", config.Mtu); err != nil {
-			return fmt.Errorf("set eth0 mtu to %d %s", config.Mtu, err)
-		}
-		if err := network.InterfaceUp("eth0"); err != nil {
-			return fmt.Errorf("eth0 up %s", err)
-		}
-		if err := network.SetMtu("lo", config.Mtu); err != nil {
-			return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err)
-		}
-		if err := network.InterfaceUp("lo"); err != nil {
-			return fmt.Errorf("lo up %s", err)
-		}
-		if config.Gateway != "" {
-			if err := network.SetDefaultGateway(config.Gateway); err != nil {
-				return fmt.Errorf("set gateway to %s %s", config.Gateway, err)
-			}
+		strategy, err := network.GetStrategy(config.Type)
+		if err != nil {
+			return err
 		}
+		return strategy.Initialize(config, context)
 	}
 	return nil
 }
-
-// getVethName reads from Stdin the temp veth name
-// sent by the parent processes after the veth pair
-// has been created and setup
-func getVethName(pipe io.ReadCloser) (string, error) {
-	defer pipe.Close()
-	data, err := ioutil.ReadAll(pipe)
-	if err != nil {
-		return "", fmt.Errorf("error reading from stdin %s", err)
-	}
-	return string(data), nil
-}

+ 1 - 1
pkg/libcontainer/nsinit/ns_linux.go

@@ -28,7 +28,7 @@ var namespaceFileMap = map[libcontainer.Namespace]string{
 
 // getNamespaceFlags parses the container's Namespaces options to set the correct
 // flags on clone, unshare, and setns
-func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
+func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
 	for _, ns := range namespaces {
 		flag |= namespaceMap[ns]
 	}

+ 10 - 2
pkg/libcontainer/nsinit/nsinit/main.go

@@ -57,7 +57,11 @@ func main() {
 		if nspid > 0 {
 			exitCode, err = nsinit.ExecIn(container, nspid, flag.Args()[1:])
 		} else {
-			exitCode, err = nsinit.Exec(container, logFile, flag.Args()[1:])
+			term := nsinit.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
+			exitCode, err = nsinit.Exec(container,
+				&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{},
+				term,
+				logFile, flag.Args()[1:])
 		}
 		if err != nil {
 			log.Fatal(err)
@@ -72,7 +76,11 @@ func main() {
 		if flag.NArg() < 2 {
 			log.Fatal(ErrWrongArguments)
 		}
-		if err := nsinit.Init(container, cwd, console, os.NewFile(uintptr(pipeFd), "pipe"), flag.Args()[1:]); err != nil {
+		syncPipe, err := nsinit.NewSyncPipeFromFd(0, uintptr(pipeFd))
+		if err != nil {
+			log.Fatal(err)
+		}
+		if err := nsinit.Init(container, cwd, console, syncPipe, flag.Args()[1:]); err != nil {
 			log.Fatal(err)
 		}
 	default:

+ 26 - 0
pkg/libcontainer/nsinit/state.go

@@ -0,0 +1,26 @@
+package nsinit
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+)
+
+type StateWriter interface {
+	WritePid(pid int) error
+	DeletePid() error
+}
+
+type DefaultStateWriter struct {
+	Root string
+}
+
+// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container
+func (d *DefaultStateWriter) WritePid(pid int) error {
+	return ioutil.WriteFile(filepath.Join(d.Root, ".nspid"), []byte(fmt.Sprint(pid)), 0655)
+}
+
+func (d *DefaultStateWriter) DeletePid() error {
+	return os.Remove(filepath.Join(d.Root, ".nspid"))
+}

+ 73 - 0
pkg/libcontainer/nsinit/sync_pipe.go

@@ -0,0 +1,73 @@
+package nsinit
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/pkg/system"
+	"io/ioutil"
+	"os"
+)
+
+// SyncPipe allows communication to and from the child processes
+// to it's parent and allows the two independent processes to
+// syncronize their state.
+type SyncPipe struct {
+	parent, child *os.File
+}
+
+func NewSyncPipe() (s *SyncPipe, err error) {
+	s = &SyncPipe{}
+	s.child, s.parent, err = os.Pipe()
+	if err != nil {
+		return nil, err
+	}
+	system.UsetCloseOnExec(s.child.Fd())
+	return s, nil
+}
+
+func NewSyncPipeFromFd(parendFd, childFd uintptr) (*SyncPipe, error) {
+	s := &SyncPipe{}
+	if parendFd > 0 {
+		s.parent = os.NewFile(parendFd, "parendPipe")
+	} else if childFd > 0 {
+		s.child = os.NewFile(childFd, "childPipe")
+	} else {
+		return nil, fmt.Errorf("no valid sync pipe fd specified")
+	}
+	return s, nil
+}
+
+func (s *SyncPipe) SendToChild(context libcontainer.Context) error {
+	data, err := json.Marshal(context)
+	if err != nil {
+		return err
+	}
+	s.parent.Write(data)
+	return nil
+}
+
+func (s *SyncPipe) ReadFromParent() (libcontainer.Context, error) {
+	data, err := ioutil.ReadAll(s.child)
+	if err != nil {
+		return nil, fmt.Errorf("error reading from sync pipe %s", err)
+	}
+	var context libcontainer.Context
+	if len(data) > 0 {
+		if err := json.Unmarshal(data, &context); err != nil {
+			return nil, err
+		}
+	}
+	return context, nil
+
+}
+
+func (s *SyncPipe) Close() error {
+	if s.parent != nil {
+		s.parent.Close()
+	}
+	if s.child != nil {
+		s.child.Close()
+	}
+	return nil
+}

+ 118 - 0
pkg/libcontainer/nsinit/term.go

@@ -0,0 +1,118 @@
+package nsinit
+
+import (
+	"github.com/dotcloud/docker/pkg/term"
+	"io"
+	"os"
+	"os/exec"
+)
+
+type Terminal interface {
+	io.Closer
+	SetMaster(*os.File)
+	Attach(*exec.Cmd) error
+	Resize(h, w int) error
+}
+
+func NewTerminal(stdin io.Reader, stdout, stderr io.Writer, tty bool) Terminal {
+	if tty {
+		return &TtyTerminal{
+			stdin:  stdin,
+			stdout: stdout,
+			stderr: stderr,
+		}
+	}
+	return &StdTerminal{
+		stdin:  stdin,
+		stdout: stdout,
+		stderr: stderr,
+	}
+}
+
+type TtyTerminal struct {
+	stdin          io.Reader
+	stdout, stderr io.Writer
+	master         *os.File
+	state          *term.State
+}
+
+func (t *TtyTerminal) Resize(h, w int) error {
+	return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
+}
+
+func (t *TtyTerminal) SetMaster(master *os.File) {
+	t.master = master
+}
+
+func (t *TtyTerminal) Attach(command *exec.Cmd) error {
+	go io.Copy(t.stdout, t.master)
+	go io.Copy(t.master, t.stdin)
+
+	state, err := t.setupWindow(t.master, os.Stdin)
+	if err != nil {
+		command.Process.Kill()
+		return err
+	}
+	t.state = state
+	return err
+}
+
+// SetupWindow gets the parent window size and sets the master
+// pty to the current size and set the parents mode to RAW
+func (t *TtyTerminal) setupWindow(master, parent *os.File) (*term.State, error) {
+	ws, err := term.GetWinsize(parent.Fd())
+	if err != nil {
+		return nil, err
+	}
+	if err := term.SetWinsize(master.Fd(), ws); err != nil {
+		return nil, err
+	}
+	return term.SetRawTerminal(parent.Fd())
+}
+
+func (t *TtyTerminal) Close() error {
+	term.RestoreTerminal(os.Stdin.Fd(), t.state)
+	return t.master.Close()
+}
+
+type StdTerminal struct {
+	stdin          io.Reader
+	stdout, stderr io.Writer
+}
+
+func (s *StdTerminal) SetMaster(*os.File) {
+	// no need to set master on non tty
+}
+
+func (s *StdTerminal) Close() error {
+	return nil
+}
+
+func (s *StdTerminal) Resize(h, w int) error {
+	return nil
+}
+
+func (s *StdTerminal) Attach(command *exec.Cmd) error {
+	inPipe, err := command.StdinPipe()
+	if err != nil {
+		return err
+	}
+	outPipe, err := command.StdoutPipe()
+	if err != nil {
+		return err
+	}
+	errPipe, err := command.StderrPipe()
+	if err != nil {
+		return err
+	}
+
+	go func() {
+		defer inPipe.Close()
+		io.Copy(inPipe, s.stdin)
+	}()
+
+	go io.Copy(s.stdout, outPipe)
+	go io.Copy(s.stderr, errPipe)
+
+	return nil
+}

+ 3 - 2
runtime.go

@@ -7,7 +7,8 @@ import (
 	"github.com/dotcloud/docker/dockerversion"
 	"github.com/dotcloud/docker/engine"
 	"github.com/dotcloud/docker/execdriver"
-	"github.com/dotcloud/docker/execdriver/lxc"
+	_ "github.com/dotcloud/docker/execdriver/lxc"
+	"github.com/dotcloud/docker/execdriver/namespaces"
 	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/graphdriver/aufs"
 	_ "github.com/dotcloud/docker/graphdriver/btrfs"
@@ -703,7 +704,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime
 
 	sysInfo := sysinfo.New(false)
 
-	ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor)
+	ed, err := namespaces.NewDriver()
 	if err != nil {
 		return nil, err
 	}