فهرست منبع

Merge pull request #3580 from crosbymichael/extract-lxc

Move LXC into a sub pkg and provide initial execution driver interface
Guillaume J. Charmes 11 سال پیش
والد
کامیت
fec97e72db

+ 3 - 3
api.go

@@ -657,16 +657,16 @@ func postContainersCreate(srv *Server, version float64, w http.ResponseWriter, r
 	for scanner.Scan() {
 		out.Warnings = append(out.Warnings, scanner.Text())
 	}
-	if job.GetenvInt("Memory") > 0 && !srv.runtime.capabilities.MemoryLimit {
+	if job.GetenvInt("Memory") > 0 && !srv.runtime.sysInfo.MemoryLimit {
 		log.Println("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.")
 		out.Warnings = append(out.Warnings, "Your kernel does not support memory limit capabilities. Limitation discarded.")
 	}
-	if job.GetenvInt("Memory") > 0 && !srv.runtime.capabilities.SwapLimit {
+	if job.GetenvInt("Memory") > 0 && !srv.runtime.sysInfo.SwapLimit {
 		log.Println("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.")
 		out.Warnings = append(out.Warnings, "Your kernel does not support memory swap capabilities. Limitation discarded.")
 	}
 
-	if !job.GetenvBool("NetworkDisabled") && srv.runtime.capabilities.IPv4ForwardingDisabled {
+	if !job.GetenvBool("NetworkDisabled") && srv.runtime.sysInfo.IPv4ForwardingDisabled {
 		log.Println("Warning: IPv4 forwarding is disabled.")
 		out.Warnings = append(out.Warnings, "IPv4 forwarding is disabled.")
 	}

+ 6 - 1
cgroups/cgroups.go

@@ -12,8 +12,13 @@ import (
 	"strings"
 )
 
-// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
+type Values struct {
+	Memory     int64 `json:"memory"`
+	MemorySwap int64 `json:"memory_swap"`
+	CpuShares  int64 `json:"cpu_shares"`
+}
 
+// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
 func FindCgroupMountpoint(subsystem string) (string, error) {
 	mounts, err := mount.GetMounts()
 	if err != nil {

+ 7 - 6
commands.go

@@ -12,6 +12,7 @@ import (
 	"github.com/dotcloud/docker/auth"
 	"github.com/dotcloud/docker/engine"
 	flag "github.com/dotcloud/docker/pkg/mflag"
+	"github.com/dotcloud/docker/pkg/sysinfo"
 	"github.com/dotcloud/docker/pkg/term"
 	"github.com/dotcloud/docker/registry"
 	"github.com/dotcloud/docker/utils"
@@ -470,7 +471,7 @@ func (cli *DockerCli) CmdInfo(args ...string) error {
 		fmt.Fprintf(cli.out, "Debug mode (client): %v\n", os.Getenv("DEBUG") != "")
 		fmt.Fprintf(cli.out, "Fds: %d\n", remoteInfo.GetInt("NFd"))
 		fmt.Fprintf(cli.out, "Goroutines: %d\n", remoteInfo.GetInt("NGoroutines"))
-		fmt.Fprintf(cli.out, "LXC Version: %s\n", remoteInfo.Get("LXCVersion"))
+		fmt.Fprintf(cli.out, "Execution Driver: %s\n", remoteInfo.Get("ExecutionDriver"))
 		fmt.Fprintf(cli.out, "EventsListeners: %d\n", remoteInfo.GetInt("NEventsListener"))
 		fmt.Fprintf(cli.out, "Kernel Version: %s\n", remoteInfo.Get("KernelVersion"))
 
@@ -1745,14 +1746,14 @@ func (cli *DockerCli) CmdTag(args ...string) error {
 }
 
 //FIXME Only used in tests
-func ParseRun(args []string, capabilities *Capabilities) (*Config, *HostConfig, *flag.FlagSet, error) {
+func ParseRun(args []string, sysInfo *sysinfo.SysInfo) (*Config, *HostConfig, *flag.FlagSet, error) {
 	cmd := flag.NewFlagSet("run", flag.ContinueOnError)
 	cmd.SetOutput(ioutil.Discard)
 	cmd.Usage = nil
-	return parseRun(cmd, args, capabilities)
+	return parseRun(cmd, args, sysInfo)
 }
 
-func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Config, *HostConfig, *flag.FlagSet, error) {
+func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config, *HostConfig, *flag.FlagSet, error) {
 	var (
 		// FIXME: use utils.ListOpts for attach and volumes?
 		flAttach  = NewListOpts(ValidateAttach)
@@ -1802,7 +1803,7 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
 	}
 
 	// Check if the kernel supports memory limit cgroup.
-	if capabilities != nil && *flMemoryString != "" && !capabilities.MemoryLimit {
+	if sysInfo != nil && *flMemoryString != "" && !sysInfo.MemoryLimit {
 		*flMemoryString = ""
 	}
 
@@ -1934,7 +1935,7 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
 		PublishAllPorts: *flPublishAll,
 	}
 
-	if capabilities != nil && flMemory > 0 && !capabilities.SwapLimit {
+	if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit {
 		//fmt.Fprintf(stdout, "WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n")
 		config.MemorySwap = -1
 	}

+ 131 - 212
container.go

@@ -1,11 +1,12 @@
 package docker
 
 import (
-	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/cgroups"
+	"github.com/dotcloud/docker/execdriver"
 	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/mount"
 	"github.com/dotcloud/docker/pkg/term"
@@ -16,9 +17,7 @@ import (
 	"log"
 	"net"
 	"os"
-	"os/exec"
 	"path"
-	"strconv"
 	"strings"
 	"sync"
 	"syscall"
@@ -55,7 +54,7 @@ type Container struct {
 	Name           string
 	Driver         string
 
-	cmd       *exec.Cmd
+	process   *execdriver.Process
 	stdout    *utils.WriteBroadcaster
 	stderr    *utils.WriteBroadcaster
 	stdin     io.ReadCloser
@@ -235,10 +234,6 @@ func (container *Container) Inject(file io.Reader, pth string) error {
 	return nil
 }
 
-func (container *Container) Cmd() *exec.Cmd {
-	return container.cmd
-}
-
 func (container *Container) When() time.Time {
 	return container.Created
 }
@@ -305,23 +300,14 @@ func (container *Container) generateEnvConfig(env []string) error {
 	return nil
 }
 
-func (container *Container) generateLXCConfig() error {
-	fo, err := os.Create(container.lxcConfigPath())
-	if err != nil {
-		return err
-	}
-	defer fo.Close()
-	return LxcTemplateCompiled.Execute(fo, container)
-}
-
-func (container *Container) startPty() error {
+func (container *Container) setupPty() error {
 	ptyMaster, ptySlave, err := pty.Open()
 	if err != nil {
 		return err
 	}
 	container.ptyMaster = ptyMaster
-	container.cmd.Stdout = ptySlave
-	container.cmd.Stderr = ptySlave
+	container.process.Stdout = ptySlave
+	container.process.Stderr = ptySlave
 
 	// Copy the PTYs to our broadcasters
 	go func() {
@@ -333,8 +319,8 @@ func (container *Container) startPty() error {
 
 	// stdin
 	if container.Config.OpenStdin {
-		container.cmd.Stdin = ptySlave
-		container.cmd.SysProcAttr.Setctty = true
+		container.process.Stdin = ptySlave
+		container.process.SysProcAttr.Setctty = true
 		go func() {
 			defer container.stdin.Close()
 			utils.Debugf("startPty: begin of stdin pipe")
@@ -342,18 +328,14 @@ func (container *Container) startPty() error {
 			utils.Debugf("startPty: end of stdin pipe")
 		}()
 	}
-	if err := container.cmd.Start(); err != nil {
-		return err
-	}
-	ptySlave.Close()
 	return nil
 }
 
-func (container *Container) start() error {
-	container.cmd.Stdout = container.stdout
-	container.cmd.Stderr = container.stderr
+func (container *Container) setupStd() error {
+	container.process.Stdout = container.stdout
+	container.process.Stderr = container.stderr
 	if container.Config.OpenStdin {
-		stdin, err := container.cmd.StdinPipe()
+		stdin, err := container.process.StdinPipe()
 		if err != nil {
 			return err
 		}
@@ -364,7 +346,7 @@ func (container *Container) start() error {
 			utils.Debugf("start: end of stdin pipe")
 		}()
 	}
-	return container.cmd.Start()
+	return nil
 }
 
 func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, stdout io.Writer, stderr io.Writer) chan error {
@@ -384,12 +366,14 @@ func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, s
 				if container.Config.StdinOnce && !container.Config.Tty {
 					defer cStdin.Close()
 				} else {
-					if cStdout != nil {
-						defer cStdout.Close()
-					}
-					if cStderr != nil {
-						defer cStderr.Close()
-					}
+					defer func() {
+						if cStdout != nil {
+							cStdout.Close()
+						}
+						if cStderr != nil {
+							cStderr.Close()
+						}
+					}()
 				}
 				if container.Config.Tty {
 					_, err = utils.CopyEscapable(cStdin, stdin)
@@ -485,12 +469,15 @@ func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, s
 	}
 
 	return utils.Go(func() error {
-		if cStdout != nil {
-			defer cStdout.Close()
-		}
-		if cStderr != nil {
-			defer cStderr.Close()
-		}
+		defer func() {
+			if cStdout != nil {
+				cStdout.Close()
+			}
+			if cStderr != nil {
+				cStderr.Close()
+			}
+		}()
+
 		// FIXME: how to clean up the stdin goroutine without the unwanted side effect
 		// of closing the passed stdin? Add an intermediary io.Pipe?
 		for i := 0; i < nJobs; i += 1 {
@@ -532,16 +519,16 @@ func (container *Container) Start() (err error) {
 	}
 
 	// Make sure the config is compatible with the current kernel
-	if container.Config.Memory > 0 && !container.runtime.capabilities.MemoryLimit {
+	if container.Config.Memory > 0 && !container.runtime.sysInfo.MemoryLimit {
 		log.Printf("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n")
 		container.Config.Memory = 0
 	}
-	if container.Config.Memory > 0 && !container.runtime.capabilities.SwapLimit {
+	if container.Config.Memory > 0 && !container.runtime.sysInfo.SwapLimit {
 		log.Printf("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n")
 		container.Config.MemorySwap = -1
 	}
 
-	if container.runtime.capabilities.IPv4ForwardingDisabled {
+	if container.runtime.sysInfo.IPv4ForwardingDisabled {
 		log.Printf("WARNING: IPv4 forwarding is disabled. Networking will not work")
 	}
 
@@ -559,38 +546,6 @@ func (container *Container) Start() (err error) {
 		return err
 	}
 
-	if err := container.generateLXCConfig(); err != nil {
-		return err
-	}
-
-	var lxcStart string = "lxc-start"
-	if container.hostConfig.Privileged && container.runtime.capabilities.AppArmor {
-		lxcStart = path.Join(container.runtime.config.Root, "lxc-start-unconfined")
-	}
-
-	params := []string{
-		lxcStart,
-		"-n", container.ID,
-		"-f", container.lxcConfigPath(),
-		"--",
-		"/.dockerinit",
-	}
-
-	// Networking
-	if !container.Config.NetworkDisabled {
-		network := container.NetworkSettings
-		params = append(params,
-			"-g", network.Gateway,
-			"-i", fmt.Sprintf("%s/%d", network.IPAddress, network.IPPrefixLen),
-			"-mtu", strconv.Itoa(container.runtime.config.Mtu),
-		)
-	}
-
-	// User
-	if container.Config.User != "" {
-		params = append(params, "-u", container.Config.User)
-	}
-
 	// Setup environment
 	env := []string{
 		"HOME=/",
@@ -602,10 +557,6 @@ func (container *Container) Start() (err error) {
 		env = append(env, "TERM=xterm")
 	}
 
-	if container.hostConfig.Privileged {
-		params = append(params, "-privileged")
-	}
-
 	// Init any links between the parent and children
 	runtime := container.runtime
 
@@ -653,37 +604,12 @@ func (container *Container) Start() (err error) {
 		return err
 	}
 
+	var workingDir string
 	if container.Config.WorkingDir != "" {
-		workingDir := path.Clean(container.Config.WorkingDir)
-		utils.Debugf("[working dir] working dir is %s", workingDir)
-
+		workingDir = path.Clean(container.Config.WorkingDir)
 		if err := os.MkdirAll(path.Join(container.RootfsPath(), workingDir), 0755); err != nil {
 			return nil
 		}
-
-		params = append(params,
-			"-w", workingDir,
-		)
-	}
-
-	// Program
-	params = append(params, "--", container.Path)
-	params = append(params, container.Args...)
-
-	if RootIsShared() {
-		// lxc-start really needs / to be non-shared, or all kinds of stuff break
-		// when lxc-start unmount things and those unmounts propagate to the main
-		// mount namespace.
-		// What we really want is to clone into a new namespace and then
-		// mount / MS_REC|MS_SLAVE, but since we can't really clone or fork
-		// without exec in go we have to do this horrible shell hack...
-		shellString :=
-			"mount --make-rslave /; exec " +
-				utils.ShellQuoteArguments(params)
-
-		params = []string{
-			"unshare", "-m", "--", "/bin/sh", "-c", shellString,
-		}
 	}
 
 	root := container.RootfsPath()
@@ -713,7 +639,6 @@ func (container *Container) Start() (err error) {
 	}
 
 	// Mount user specified volumes
-
 	for r, v := range container.Volumes {
 		mountAs := "ro"
 		if container.VolumesRW[r] {
@@ -725,7 +650,48 @@ func (container *Container) Start() (err error) {
 		}
 	}
 
-	container.cmd = exec.Command(params[0], params[1:]...)
+	var (
+		en           *execdriver.Network
+		driverConfig []string
+	)
+
+	if !container.Config.NetworkDisabled {
+		network := container.NetworkSettings
+		en = &execdriver.Network{
+			Gateway:     network.Gateway,
+			Bridge:      network.Bridge,
+			IPAddress:   network.IPAddress,
+			IPPrefixLen: network.IPPrefixLen,
+			Mtu:         container.runtime.config.Mtu,
+		}
+	}
+
+	if lxcConf := container.hostConfig.LxcConf; lxcConf != nil {
+		for _, pair := range lxcConf {
+			driverConfig = append(driverConfig, fmt.Sprintf("%s = %s", pair.Key, pair.Value))
+		}
+	}
+	cgroupValues := &cgroups.Values{
+		Memory:     container.Config.Memory,
+		MemorySwap: container.Config.MemorySwap,
+		CpuShares:  container.Config.CpuShares,
+	}
+
+	container.process = &execdriver.Process{
+		ID:         container.ID,
+		Privileged: container.hostConfig.Privileged,
+		Rootfs:     root,
+		InitPath:   "/.dockerinit",
+		Entrypoint: container.Path,
+		Arguments:  container.Args,
+		WorkingDir: workingDir,
+		Network:    en,
+		Tty:        container.Config.Tty,
+		User:       container.Config.User,
+		Config:     driverConfig,
+		Cgroups:    cgroupValues,
+	}
+	container.process.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
 
 	// Setup logging of stdout and stderr to disk
 	if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {
@@ -734,59 +700,47 @@ func (container *Container) Start() (err error) {
 	if err := container.runtime.LogToDisk(container.stderr, container.logPath("json"), "stderr"); err != nil {
 		return err
 	}
+	container.waitLock = make(chan struct{})
 
-	container.cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
-
+	// Setuping pipes and/or Pty
+	var setup func() error
 	if container.Config.Tty {
-		err = container.startPty()
+		setup = container.setupPty
 	} else {
-		err = container.start()
+		setup = container.setupStd
 	}
-	if err != nil {
+	if err := setup(); err != nil {
 		return err
 	}
-	// FIXME: save state on disk *first*, then converge
-	// this way disk state is used as a journal, eg. we can restore after crash etc.
-	container.State.SetRunning(container.cmd.Process.Pid)
-
-	// Init the lock
-	container.waitLock = make(chan struct{})
 
-	container.ToDisk()
-	go container.monitor()
-
-	defer utils.Debugf("Container running: %v", container.State.IsRunning())
-	// We wait for the container to be fully running.
-	// Timeout after 5 seconds. In case of broken pipe, just retry.
-	// Note: The container can run and finish correctly before
-	//       the end of this loop
-	for now := time.Now(); time.Since(now) < 5*time.Second; {
-		// If the container dies while waiting for it, just return
-		if !container.State.IsRunning() {
-			return nil
-		}
-		output, err := exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput()
-		if err != nil {
-			utils.Debugf("Error with lxc-info: %s (%s)", err, output)
-
-			output, err = exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput()
-			if err != nil {
-				utils.Debugf("Second Error with lxc-info: %s (%s)", err, output)
-				return err
+	callbackLock := make(chan struct{})
+	callback := func(process *execdriver.Process) {
+		container.State.SetRunning(process.Pid())
+		if process.Tty {
+			// The callback is called after the process Start()
+			// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace
+			// which we close here.
+			if c, ok := process.Stdout.(io.Closer); ok {
+				c.Close()
 			}
-
 		}
-		if strings.Contains(string(output), "RUNNING") {
-			return nil
+		if err := container.ToDisk(); err != nil {
+			utils.Debugf("%s", err)
 		}
-		utils.Debugf("Waiting for the container to start (running: %v): %s", container.State.IsRunning(), bytes.TrimSpace(output))
-		time.Sleep(50 * time.Millisecond)
+		close(callbackLock)
 	}
 
-	if container.State.IsRunning() {
-		return ErrContainerStartTimeout
+	// We use a callback here instead of a goroutine and an chan for
+	// syncronization purposes
+	cErr := utils.Go(func() error { return container.monitor(callback) })
+
+	// Start should not return until the process is actually running
+	select {
+	case <-callbackLock:
+	case err := <-cErr:
+		return err
 	}
-	return ErrContainerStart
+	return nil
 }
 
 func (container *Container) getBindMap() (map[string]BindMap, error) {
@@ -1159,47 +1113,23 @@ func (container *Container) releaseNetwork() {
 	container.NetworkSettings = &NetworkSettings{}
 }
 
-// FIXME: replace this with a control socket within dockerinit
-func (container *Container) waitLxc() error {
-	for {
-		output, err := exec.Command("lxc-info", "-n", container.ID).CombinedOutput()
-		if err != nil {
-			return err
-		}
-		if !strings.Contains(string(output), "RUNNING") {
-			return nil
-		}
-		time.Sleep(500 * time.Millisecond)
-	}
-}
-
-func (container *Container) monitor() {
-	// Wait for the program to exit
+func (container *Container) monitor(callback execdriver.StartCallback) error {
+	var (
+		err      error
+		exitCode int
+	)
 
-	// If the command does not exist, try to wait via lxc
-	// (This probably happens only for ghost containers, i.e. containers that were running when Docker started)
-	if container.cmd == nil {
-		utils.Debugf("monitor: waiting for container %s using waitLxc", container.ID)
-		if err := container.waitLxc(); err != nil {
-			utils.Errorf("monitor: while waiting for container %s, waitLxc had a problem: %s", container.ID, err)
-		}
+	if container.process == nil {
+		// This happends when you have a GHOST container with lxc
+		err = container.runtime.WaitGhost(container)
 	} else {
-		utils.Debugf("monitor: waiting for container %s using cmd.Wait", container.ID)
-		if err := container.cmd.Wait(); err != nil {
-			// Since non-zero exit status and signal terminations will cause err to be non-nil,
-			// we have to actually discard it. Still, log it anyway, just in case.
-			utils.Debugf("monitor: cmd.Wait reported exit status %s for container %s", err, container.ID)
-		}
-	}
-	utils.Debugf("monitor: container %s finished", container.ID)
-
-	exitCode := -1
-	if container.cmd != nil {
-		exitCode = container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+		exitCode, err = container.runtime.Run(container, callback)
 	}
 
-	if container.runtime != nil && container.runtime.srv != nil {
-		container.runtime.srv.LogEvent("die", container.ID, container.runtime.repositories.ImageName(container.Image))
+	if err != nil {
+		if container.runtime != nil && container.runtime.srv != nil {
+			container.runtime.srv.LogEvent("die", container.ID, container.runtime.repositories.ImageName(container.Image))
+		}
 	}
 
 	// Cleanup
@@ -1210,21 +1140,20 @@ func (container *Container) monitor() {
 		container.stdin, container.stdinPipe = io.Pipe()
 	}
 
-	// Report status back
 	container.State.SetStopped(exitCode)
 
-	// Release the lock
 	close(container.waitLock)
 
-	if err := container.ToDisk(); err != nil {
-		// FIXME: there is a race condition here which causes this to fail during the unit tests.
-		// If another goroutine was waiting for Wait() to return before removing the container's root
-		// from the filesystem... At this point it may already have done so.
-		// This is because State.setStopped() has already been called, and has caused Wait()
-		// to return.
-		// FIXME: why are we serializing running state to disk in the first place?
-		//log.Printf("%s: Failed to dump configuration to the disk: %s", container.ID, err)
-	}
+	// FIXME: there is a race condition here which causes this to fail during the unit tests.
+	// If another goroutine was waiting for Wait() to return before removing the container's root
+	// from the filesystem... At this point it may already have done so.
+	// This is because State.setStopped() has already been called, and has caused Wait()
+	// to return.
+	// FIXME: why are we serializing running state to disk in the first place?
+	//log.Printf("%s: Failed to dump configuration to the disk: %s", container.ID, err)
+	container.ToDisk()
+
+	return err
 }
 
 func (container *Container) cleanup() {
@@ -1267,13 +1196,7 @@ func (container *Container) kill(sig int) error {
 	if !container.State.IsRunning() {
 		return nil
 	}
-
-	if output, err := exec.Command("lxc-kill", "-n", container.ID, strconv.Itoa(sig)).CombinedOutput(); err != nil {
-		log.Printf("error killing container %s (%s, %s)", utils.TruncateID(container.ID), output, err)
-		return err
-	}
-
-	return nil
+	return container.runtime.Kill(container, sig)
 }
 
 func (container *Container) Kill() error {
@@ -1288,11 +1211,11 @@ func (container *Container) Kill() error {
 
 	// 2. Wait for the process to die, in last resort, try to kill the process directly
 	if err := container.WaitTimeout(10 * time.Second); err != nil {
-		if container.cmd == nil {
+		if container.process == nil {
 			return fmt.Errorf("lxc-kill failed, impossible to kill the container %s", utils.TruncateID(container.ID))
 		}
 		log.Printf("Container %s failed to exit within 10 seconds of lxc-kill %s - trying direct SIGKILL", "SIGKILL", utils.TruncateID(container.ID))
-		if err := container.cmd.Process.Kill(); err != nil {
+		if err := container.runtime.Kill(container, 9); err != nil {
 			return err
 		}
 	}
@@ -1463,10 +1386,6 @@ func (container *Container) EnvConfigPath() (string, error) {
 	return p, nil
 }
 
-func (container *Container) lxcConfigPath() string {
-	return path.Join(container.root, "config.lxc")
-}
-
 // This method must be exported to be used from the lxc template
 func (container *Container) RootfsPath() string {
 	return container.rootfs

+ 2 - 0
execdriver/MAINTAINERS

@@ -0,0 +1,2 @@
+Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
+Guillaume Charmes <guillaume@dotcloud.com> (@creack)

+ 87 - 0
execdriver/chroot/driver.go

@@ -0,0 +1,87 @@
+package chroot
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/execdriver"
+	"github.com/dotcloud/docker/mount"
+	"os"
+	"os/exec"
+)
+
+const (
+	DriverName = "chroot"
+	Version    = "0.1"
+)
+
+func init() {
+	execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
+		if err := mount.ForceMount("proc", "proc", "proc", ""); err != nil {
+			return err
+		}
+		defer mount.ForceUnmount("proc")
+		cmd := exec.Command(args.Args[0], args.Args[1:]...)
+
+		cmd.Stderr = os.Stderr
+		cmd.Stdout = os.Stdout
+		cmd.Stdin = os.Stdin
+
+		return cmd.Run()
+	})
+}
+
+type driver struct {
+}
+
+func NewDriver() (*driver, error) {
+	return &driver{}, nil
+}
+
+func (d *driver) Run(c *execdriver.Process, startCallback execdriver.StartCallback) (int, error) {
+	params := []string{
+		"chroot",
+		c.Rootfs,
+		"/.dockerinit",
+		"-driver",
+		DriverName,
+	}
+	params = append(params, c.Entrypoint)
+	params = append(params, c.Arguments...)
+
+	var (
+		name = params[0]
+		arg  = params[1:]
+	)
+	aname, err := exec.LookPath(name)
+	if err != nil {
+		aname = name
+	}
+	c.Path = aname
+	c.Args = append([]string{name}, arg...)
+
+	if err := c.Start(); err != nil {
+		return -1, err
+	}
+
+	if startCallback != nil {
+		startCallback(c)
+	}
+
+	err = c.Wait()
+	return c.GetExitCode(), err
+}
+
+func (d *driver) Kill(p *execdriver.Process, sig int) error {
+	return p.Process.Kill()
+}
+
+func (d *driver) Wait(id string) error {
+	panic("Not Implemented")
+}
+
+func (d *driver) Info(id string) execdriver.Info {
+	panic("Not implemented")
+}
+
+func (d *driver) Name() string {
+	return fmt.Sprintf("%s-%s", DriverName, Version)
+}

+ 115 - 0
execdriver/driver.go

@@ -0,0 +1,115 @@
+package execdriver
+
+import (
+	"errors"
+	"github.com/dotcloud/docker/cgroups"
+	"os/exec"
+	"syscall"
+)
+
+var (
+	ErrNotRunning              = errors.New("Process could not be started")
+	ErrWaitTimeoutReached      = errors.New("Wait timeout reached")
+	ErrDriverAlreadyRegistered = errors.New("A driver already registered this docker init function")
+	ErrDriverNotFound          = errors.New("The requested docker init has not been found")
+)
+
+var dockerInitFcts map[string]InitFunc
+
+type (
+	StartCallback func(*Process)
+	InitFunc      func(i *InitArgs) error
+)
+
+func RegisterInitFunc(name string, fct InitFunc) error {
+	if dockerInitFcts == nil {
+		dockerInitFcts = make(map[string]InitFunc)
+	}
+	if _, ok := dockerInitFcts[name]; ok {
+		return ErrDriverAlreadyRegistered
+	}
+	dockerInitFcts[name] = fct
+	return nil
+}
+
+func GetInitFunc(name string) (InitFunc, error) {
+	fct, ok := dockerInitFcts[name]
+	if !ok {
+		return nil, ErrDriverNotFound
+	}
+	return fct, nil
+}
+
+// Args provided to the init function for a driver
+type InitArgs struct {
+	User       string
+	Gateway    string
+	Ip         string
+	WorkDir    string
+	Privileged bool
+	Env        []string
+	Args       []string
+	Mtu        int
+	Driver     string
+}
+
+// Driver specific information based on
+// processes registered with the driver
+type Info interface {
+	IsRunning() bool
+}
+
+type Driver interface {
+	Run(c *Process, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
+	Kill(c *Process, sig int) error
+	Wait(id string) error // Wait on an out of process...process - lxc ghosts TODO: Rename to reattach, reconnect
+	Name() string         // Driver name
+	Info(id string) Info  // "temporary" hack (until we move state from core to plugins)
+}
+
+// Network settings of the container
+type Network struct {
+	Gateway     string `json:"gateway"`
+	IPAddress   string `json:"ip"`
+	Bridge      string `json:"bridge"`
+	IPPrefixLen int    `json:"ip_prefix_len"`
+	Mtu         int    `json:"mtu"`
+}
+
+// Process wrapps an os/exec.Cmd to add more metadata
+// TODO: Rename to Command
+type Process struct {
+	exec.Cmd
+
+	ID         string          `json:"id"`
+	Privileged bool            `json:"privileged"`
+	User       string          `json:"user"`
+	Rootfs     string          `json:"rootfs"`   // root fs of the container
+	InitPath   string          `json:"initpath"` // dockerinit
+	Entrypoint string          `json:"entrypoint"`
+	Arguments  []string        `json:"arguments"`
+	WorkingDir string          `json:"working_dir"`
+	ConfigPath string          `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
+	Tty        bool            `json:"tty"`
+	Network    *Network        `json:"network"` // if network is nil then networking is disabled
+	Config     []string        `json:"config"`  //  generic values that specific drivers can consume
+	Cgroups    *cgroups.Values `json:"cgroups"`
+}
+
+// Return the pid of the process
+// If the process is nil -1 will be returned
+func (c *Process) Pid() int {
+	if c.Process == nil {
+		return -1
+	}
+	return c.Process.Pid
+}
+
+// Return the exit code of the process
+// if the process has not exited -1 will be returned
+func (c *Process) GetExitCode() int {
+	if c.ProcessState == nil {
+		return -1
+	}
+	return c.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+}

+ 323 - 0
execdriver/lxc/driver.go

@@ -0,0 +1,323 @@
+package lxc
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/execdriver"
+	"github.com/dotcloud/docker/utils"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
+	"path"
+	"strconv"
+	"strings"
+	"syscall"
+	"time"
+)
+
+const DriverName = "lxc"
+
+func init() {
+	execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
+		if err := setupHostname(args); err != nil {
+			return err
+		}
+
+		if err := setupNetworking(args); err != nil {
+			return err
+		}
+
+		if err := setupCapabilities(args); err != nil {
+			return err
+		}
+		if err := setupWorkingDirectory(args); err != nil {
+			return err
+		}
+
+		if err := changeUser(args); err != nil {
+			return err
+		}
+		path, err := exec.LookPath(args.Args[0])
+		if err != nil {
+			log.Printf("Unable to locate %v", args.Args[0])
+			os.Exit(127)
+		}
+		if err := syscall.Exec(path, args.Args, os.Environ()); err != nil {
+			return fmt.Errorf("dockerinit unable to execute %s - %s", path, err)
+		}
+		panic("Unreachable")
+	})
+}
+
+type driver struct {
+	root       string // root path for the driver to use
+	apparmor   bool
+	sharedRoot bool
+}
+
+func NewDriver(root string, apparmor bool) (*driver, error) {
+	// setup unconfined symlink
+	if err := linkLxcStart(root); err != nil {
+		return nil, err
+	}
+	return &driver{
+		apparmor:   apparmor,
+		root:       root,
+		sharedRoot: rootIsShared(),
+	}, nil
+}
+
+func (d *driver) Name() string {
+	version := d.version()
+	return fmt.Sprintf("%s-%s", DriverName, version)
+}
+
+func (d *driver) Run(c *execdriver.Process, startCallback execdriver.StartCallback) (int, error) {
+	configPath, err := d.generateLXCConfig(c)
+	if err != nil {
+		return -1, err
+	}
+	params := []string{
+		"lxc-start",
+		"-n", c.ID,
+		"-f", configPath,
+		"--",
+		c.InitPath,
+		"-driver",
+		DriverName,
+	}
+
+	if c.Network != nil {
+		params = append(params,
+			"-g", c.Network.Gateway,
+			"-i", fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen),
+			"-mtu", strconv.Itoa(c.Network.Mtu),
+		)
+	}
+
+	if c.User != "" {
+		params = append(params, "-u", c.User)
+	}
+
+	if c.Privileged {
+		if d.apparmor {
+			params[0] = path.Join(d.root, "lxc-start-unconfined")
+
+		}
+		params = append(params, "-privileged")
+	}
+
+	if c.WorkingDir != "" {
+		params = append(params, "-w", c.WorkingDir)
+	}
+
+	if d.sharedRoot {
+		// lxc-start really needs / to be non-shared, or all kinds of stuff break
+		// when lxc-start unmount things and those unmounts propagate to the main
+		// mount namespace.
+		// What we really want is to clone into a new namespace and then
+		// mount / MS_REC|MS_SLAVE, but since we can't really clone or fork
+		// without exec in go we have to do this horrible shell hack...
+		shellString :=
+			"mount --make-rslave /; exec " +
+				utils.ShellQuoteArguments(params)
+
+		params = []string{
+			"unshare", "-m", "--", "/bin/sh", "-c", shellString,
+		}
+	}
+
+	params = append(params, "--", c.Entrypoint)
+	params = append(params, c.Arguments...)
+
+	var (
+		name = params[0]
+		arg  = params[1:]
+	)
+	aname, err := exec.LookPath(name)
+	if err != nil {
+		aname = name
+	}
+	c.Path = aname
+	c.Args = append([]string{name}, arg...)
+
+	if err := c.Start(); err != nil {
+		return -1, err
+	}
+
+	var (
+		waitErr  error
+		waitLock = make(chan struct{})
+	)
+	go func() {
+		if err := c.Wait(); err != nil {
+			waitErr = err
+		}
+		close(waitLock)
+	}()
+
+	// Poll lxc for RUNNING status
+	if err := d.waitForStart(c, waitLock); err != nil {
+		return -1, err
+	}
+
+	if startCallback != nil {
+		startCallback(c)
+	}
+
+	<-waitLock
+
+	return c.GetExitCode(), waitErr
+}
+
+func (d *driver) Kill(c *execdriver.Process, sig int) error {
+	return d.kill(c, sig)
+}
+
+func (d *driver) Wait(id string) error {
+	for {
+		output, err := exec.Command("lxc-info", "-n", id).CombinedOutput()
+		if err != nil {
+			return err
+		}
+		if !strings.Contains(string(output), "RUNNING") {
+			return nil
+		}
+		time.Sleep(500 * time.Millisecond)
+	}
+}
+
+func (d *driver) version() string {
+	version := ""
+	if output, err := exec.Command("lxc-version").CombinedOutput(); err == nil {
+		outputStr := string(output)
+		if len(strings.SplitN(outputStr, ":", 2)) == 2 {
+			version = strings.TrimSpace(strings.SplitN(outputStr, ":", 2)[1])
+		}
+	}
+	return version
+}
+
+func (d *driver) kill(c *execdriver.Process, sig int) error {
+	output, err := exec.Command("lxc-kill", "-n", c.ID, strconv.Itoa(sig)).CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("Err: %s Output: %s", err, output)
+	}
+	return nil
+}
+
+func (d *driver) waitForStart(c *execdriver.Process, waitLock chan struct{}) error {
+	var (
+		err    error
+		output []byte
+	)
+	// We wait for the container to be fully running.
+	// Timeout after 5 seconds. In case of broken pipe, just retry.
+	// Note: The container can run and finish correctly before
+	// the end of this loop
+	for now := time.Now(); time.Since(now) < 5*time.Second; {
+		select {
+		case <-waitLock:
+			// If the process dies while waiting for it, just return
+			return nil
+			if c.ProcessState != nil && c.ProcessState.Exited() {
+				return nil
+			}
+		default:
+		}
+
+		output, err = d.getInfo(c.ID)
+		if err != nil {
+			output, err = d.getInfo(c.ID)
+			if err != nil {
+				return err
+			}
+		}
+		if strings.Contains(string(output), "RUNNING") {
+			return nil
+		}
+		time.Sleep(50 * time.Millisecond)
+	}
+	return execdriver.ErrNotRunning
+}
+
+func (d *driver) getInfo(id string) ([]byte, error) {
+	return exec.Command("lxc-info", "-s", "-n", id).CombinedOutput()
+}
+
+type info struct {
+	ID     string
+	driver *driver
+}
+
+func (i *info) IsRunning() bool {
+	var running bool
+
+	output, err := i.driver.getInfo(i.ID)
+	if err != nil {
+		panic(err)
+	}
+	if strings.Contains(string(output), "RUNNING") {
+		running = true
+	}
+	return running
+}
+
+func (d *driver) Info(id string) execdriver.Info {
+	return &info{
+		ID:     id,
+		driver: d,
+	}
+}
+
+func linkLxcStart(root string) error {
+	sourcePath, err := exec.LookPath("lxc-start")
+	if err != nil {
+		return err
+	}
+	targetPath := path.Join(root, "lxc-start-unconfined")
+
+	if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) {
+		return err
+	} else if err == nil {
+		if err := os.Remove(targetPath); err != nil {
+			return err
+		}
+	}
+	return os.Symlink(sourcePath, targetPath)
+}
+
+// TODO: This can be moved to the mountinfo reader in the mount pkg
+func rootIsShared() bool {
+	if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil {
+		for _, line := range strings.Split(string(data), "\n") {
+			cols := strings.Split(line, " ")
+			if len(cols) >= 6 && cols[4] == "/" {
+				return strings.HasPrefix(cols[6], "shared")
+			}
+		}
+	}
+
+	// No idea, probably safe to assume so
+	return true
+}
+
+func (d *driver) generateLXCConfig(p *execdriver.Process) (string, error) {
+	root := path.Join(d.root, "containers", p.ID, "config.lxc")
+	fo, err := os.Create(root)
+	if err != nil {
+		return "", err
+	}
+	defer fo.Close()
+
+	if err := LxcTemplateCompiled.Execute(fo, struct {
+		*execdriver.Process
+		AppArmor bool
+	}{
+		Process:  p,
+		AppArmor: d.apparmor,
+	}); err != nil {
+		return "", err
+	}
+	return root, nil
+}

+ 153 - 0
execdriver/lxc/init.go

@@ -0,0 +1,153 @@
+package lxc
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/execdriver"
+	"github.com/dotcloud/docker/pkg/netlink"
+	"github.com/dotcloud/docker/utils"
+	"github.com/syndtr/gocapability/capability"
+	"net"
+	"os"
+	"strconv"
+	"strings"
+	"syscall"
+)
+
+func setupHostname(args *execdriver.InitArgs) error {
+	hostname := getEnv(args, "HOSTNAME")
+	if hostname == "" {
+		return nil
+	}
+	return setHostname(hostname)
+}
+
+// Setup networking
+func setupNetworking(args *execdriver.InitArgs) error {
+	if args.Ip != "" {
+		// eth0
+		iface, err := net.InterfaceByName("eth0")
+		if err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+		ip, ipNet, err := net.ParseCIDR(args.Ip)
+		if err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+		if err := netlink.NetworkLinkAddIp(iface, ip, ipNet); err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+		if err := netlink.NetworkSetMTU(iface, args.Mtu); err != nil {
+			return fmt.Errorf("Unable to set MTU: %v", err)
+		}
+		if err := netlink.NetworkLinkUp(iface); err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+
+		// loopback
+		iface, err = net.InterfaceByName("lo")
+		if err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+		if err := netlink.NetworkLinkUp(iface); err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+	}
+	if args.Gateway != "" {
+		gw := net.ParseIP(args.Gateway)
+		if gw == nil {
+			return fmt.Errorf("Unable to set up networking, %s is not a valid gateway IP", args.Gateway)
+		}
+
+		if err := netlink.AddDefaultGw(gw); err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+	}
+
+	return nil
+}
+
+// Setup working directory
+func setupWorkingDirectory(args *execdriver.InitArgs) error {
+	if args.WorkDir == "" {
+		return nil
+	}
+	if err := syscall.Chdir(args.WorkDir); err != nil {
+		return fmt.Errorf("Unable to change dir to %v: %v", args.WorkDir, err)
+	}
+	return nil
+}
+
+// Takes care of dropping privileges to the desired user
+func changeUser(args *execdriver.InitArgs) error {
+	if args.User == "" {
+		return nil
+	}
+	userent, err := utils.UserLookup(args.User)
+	if err != nil {
+		return fmt.Errorf("Unable to find user %v: %v", args.User, err)
+	}
+
+	uid, err := strconv.Atoi(userent.Uid)
+	if err != nil {
+		return fmt.Errorf("Invalid uid: %v", userent.Uid)
+	}
+	gid, err := strconv.Atoi(userent.Gid)
+	if err != nil {
+		return fmt.Errorf("Invalid gid: %v", userent.Gid)
+	}
+
+	if err := syscall.Setgid(gid); err != nil {
+		return fmt.Errorf("setgid failed: %v", err)
+	}
+	if err := syscall.Setuid(uid); err != nil {
+		return fmt.Errorf("setuid failed: %v", err)
+	}
+
+	return nil
+}
+
+func setupCapabilities(args *execdriver.InitArgs) error {
+
+	if args.Privileged {
+		return nil
+	}
+
+	drop := []capability.Cap{
+		capability.CAP_SETPCAP,
+		capability.CAP_SYS_MODULE,
+		capability.CAP_SYS_RAWIO,
+		capability.CAP_SYS_PACCT,
+		capability.CAP_SYS_ADMIN,
+		capability.CAP_SYS_NICE,
+		capability.CAP_SYS_RESOURCE,
+		capability.CAP_SYS_TIME,
+		capability.CAP_SYS_TTY_CONFIG,
+		capability.CAP_MKNOD,
+		capability.CAP_AUDIT_WRITE,
+		capability.CAP_AUDIT_CONTROL,
+		capability.CAP_MAC_OVERRIDE,
+		capability.CAP_MAC_ADMIN,
+	}
+
+	c, err := capability.NewPid(os.Getpid())
+	if err != nil {
+		return err
+	}
+
+	c.Unset(capability.CAPS|capability.BOUNDS, drop...)
+
+	if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil {
+		return err
+	}
+	return nil
+}
+
+func getEnv(args *execdriver.InitArgs, key string) string {
+	for _, kv := range args.Env {
+		parts := strings.SplitN(kv, "=", 2)
+		if parts[0] == key && len(parts) == 2 {
+			return parts[1]
+		}
+	}
+	return ""
+}

+ 1 - 1
sysinit/sysinit_darwin.go → execdriver/lxc/lxc_init_darwin.go

@@ -1,4 +1,4 @@
-package sysinit
+package lxc
 
 func setHostname(hostname string) error {
 	panic("Not supported on darwin")

+ 1 - 1
sysinit/sysinit_linux.go → execdriver/lxc/lxc_init_linux.go

@@ -1,4 +1,4 @@
-package sysinit
+package lxc
 
 import (
 	"syscall"

+ 26 - 33
lxc_template.go → execdriver/lxc/lxc_template.go

@@ -1,23 +1,24 @@
-package docker
+package lxc
 
 import (
+	"github.com/dotcloud/docker/cgroups"
 	"strings"
 	"text/template"
 )
 
 const LxcTemplate = `
-{{if .Config.NetworkDisabled}}
-# network is disabled (-n=false)
-lxc.network.type = empty
-{{else}}
+{{if .Network}}
 # network configuration
 lxc.network.type = veth
-lxc.network.link = {{.NetworkSettings.Bridge}}
+lxc.network.link = {{.Network.Bridge}}
 lxc.network.name = eth0
+{{else}}
+# network is disabled (-n=false)
+lxc.network.type = empty
 {{end}}
 
 # root filesystem
-{{$ROOTFS := .RootfsPath}}
+{{$ROOTFS := .Rootfs}}
 lxc.rootfs = {{$ROOTFS}}
 
 # use a dedicated pts for the container (and limit the number of pseudo terminal
@@ -30,8 +31,8 @@ lxc.console = none
 # no controlling tty at all
 lxc.tty = 1
 
-{{if (getHostConfig .).Privileged}}
-lxc.cgroup.devices.allow = a 
+{{if .Privileged}}
+lxc.cgroup.devices.allow = a
 {{else}}
 # no implicit access to devices
 lxc.cgroup.devices.deny = a
@@ -81,8 +82,8 @@ lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noe
 lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts newinstance,ptmxmode=0666,nosuid,noexec 0 0
 lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs size=65536k,nosuid,nodev,noexec 0 0
 
-{{if (getHostConfig .).Privileged}}
-{{if (getCapabilities .).AppArmor}}
+{{if .Privileged}}
+{{if .AppArmor}}
 lxc.aa_profile = unconfined
 {{else}}
 #lxc.aa_profile = unconfined
@@ -90,20 +91,22 @@ lxc.aa_profile = unconfined
 {{end}}
 
 # limits
-{{if .Config.Memory}}
-lxc.cgroup.memory.limit_in_bytes = {{.Config.Memory}}
-lxc.cgroup.memory.soft_limit_in_bytes = {{.Config.Memory}}
-{{with $memSwap := getMemorySwap .Config}}
+{{if .Cgroups}}
+{{if .Cgroups.Memory}}
+lxc.cgroup.memory.limit_in_bytes = {{.Cgroups.Memory}}
+lxc.cgroup.memory.soft_limit_in_bytes = {{.Cgroups.Memory}}
+{{with $memSwap := getMemorySwap .Cgroups}}
 lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}}
 {{end}}
 {{end}}
-{{if .Config.CpuShares}}
-lxc.cgroup.cpu.shares = {{.Config.CpuShares}}
+{{if .Cgroups.CpuShares}}
+lxc.cgroup.cpu.shares = {{.Cgroups.CpuShares}}
+{{end}}
 {{end}}
 
-{{if (getHostConfig .).LxcConf}}
-{{range $pair := (getHostConfig .).LxcConf}}
-{{$pair.Key}} = {{$pair.Value}}
+{{if .Config}}
+{{range $value := .Config}}
+{{$value}}
 {{end}}
 {{end}}
 `
@@ -116,29 +119,19 @@ func escapeFstabSpaces(field string) string {
 	return strings.Replace(field, " ", "\\040", -1)
 }
 
-func getMemorySwap(config *Config) int64 {
+func getMemorySwap(v *cgroups.Values) int64 {
 	// By default, MemorySwap is set to twice the size of RAM.
 	// If you want to omit MemorySwap, set it to `-1'.
-	if config.MemorySwap < 0 {
+	if v.MemorySwap < 0 {
 		return 0
 	}
-	return config.Memory * 2
-}
-
-func getHostConfig(container *Container) *HostConfig {
-	return container.hostConfig
-}
-
-func getCapabilities(container *Container) *Capabilities {
-	return container.runtime.capabilities
+	return v.Memory * 2
 }
 
 func init() {
 	var err error
 	funcMap := template.FuncMap{
 		"getMemorySwap":     getMemorySwap,
-		"getHostConfig":     getHostConfig,
-		"getCapabilities":   getCapabilities,
 		"escapeFstabSpaces": escapeFstabSpaces,
 	}
 	LxcTemplateCompiled, err = template.New("lxc").Funcs(funcMap).Parse(LxcTemplate)

+ 49 - 41
lxc_template_unit_test.go → execdriver/lxc/lxc_template_unit_test.go

@@ -1,11 +1,14 @@
-package docker
+package lxc
 
 import (
 	"bufio"
 	"fmt"
+	"github.com/dotcloud/docker/cgroups"
+	"github.com/dotcloud/docker/execdriver"
 	"io/ioutil"
 	"math/rand"
 	"os"
+	"path"
 	"strings"
 	"testing"
 	"time"
@@ -17,32 +20,39 @@ func TestLXCConfig(t *testing.T) {
 		t.Fatal(err)
 	}
 	defer os.RemoveAll(root)
+
+	os.MkdirAll(path.Join(root, "containers", "1"), 0777)
+
 	// Memory is allocated randomly for testing
 	rand.Seed(time.Now().UTC().UnixNano())
-	memMin := 33554432
-	memMax := 536870912
-	mem := memMin + rand.Intn(memMax-memMin)
-	// CPU shares as well
-	cpuMin := 100
-	cpuMax := 10000
-	cpu := cpuMin + rand.Intn(cpuMax-cpuMin)
-	container := &Container{
-		root: root,
-		Config: &Config{
-			Memory:          int64(mem),
-			CpuShares:       int64(cpu),
-			NetworkDisabled: true,
-		},
-		hostConfig: &HostConfig{
-			Privileged: false,
+	var (
+		memMin = 33554432
+		memMax = 536870912
+		mem    = memMin + rand.Intn(memMax-memMin)
+		cpuMin = 100
+		cpuMax = 10000
+		cpu    = cpuMin + rand.Intn(cpuMax-cpuMin)
+	)
+
+	driver, err := NewDriver(root, false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	process := &execdriver.Process{
+		ID: "1",
+		Cgroups: &cgroups.Values{
+			Memory:    int64(mem),
+			CpuShares: int64(cpu),
 		},
 	}
-	if err := container.generateLXCConfig(); err != nil {
+	p, err := driver.generateLXCConfig(process)
+	if err != nil {
 		t.Fatal(err)
 	}
-	grepFile(t, container.lxcConfigPath(),
+	grepFile(t, p,
 		fmt.Sprintf("lxc.cgroup.memory.limit_in_bytes = %d", mem))
-	grepFile(t, container.lxcConfigPath(),
+
+	grepFile(t, p,
 		fmt.Sprintf("lxc.cgroup.memory.memsw.limit_in_bytes = %d", mem*2))
 }
 
@@ -52,31 +62,29 @@ func TestCustomLxcConfig(t *testing.T) {
 		t.Fatal(err)
 	}
 	defer os.RemoveAll(root)
-	container := &Container{
-		root: root,
-		Config: &Config{
-			Hostname:        "foobar",
-			NetworkDisabled: true,
-		},
-		hostConfig: &HostConfig{
-			Privileged: false,
-			LxcConf: []KeyValuePair{
-				{
-					Key:   "lxc.utsname",
-					Value: "docker",
-				},
-				{
-					Key:   "lxc.cgroup.cpuset.cpus",
-					Value: "0,1",
-				},
-			},
+
+	os.MkdirAll(path.Join(root, "containers", "1"), 0777)
+
+	driver, err := NewDriver(root, false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	process := &execdriver.Process{
+		ID:         "1",
+		Privileged: false,
+		Config: []string{
+			"lxc.utsname = docker",
+			"lxc.cgroup.cpuset.cpus = 0,1",
 		},
 	}
-	if err := container.generateLXCConfig(); err != nil {
+
+	p, err := driver.generateLXCConfig(process)
+	if err != nil {
 		t.Fatal(err)
 	}
-	grepFile(t, container.lxcConfigPath(), "lxc.utsname = docker")
-	grepFile(t, container.lxcConfigPath(), "lxc.cgroup.cpuset.cpus = 0,1")
+
+	grepFile(t, p, "lxc.utsname = docker")
+	grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
 }
 
 func grepFile(t *testing.T, path string, pattern string) {

+ 0 - 1
integration/utils_test.go

@@ -38,7 +38,6 @@ func mkRuntime(f utils.Fataler) *docker.Runtime {
 	if err != nil {
 		f.Fatal(err)
 	}
-	r.UpdateCapabilities(true)
 	return r
 }
 

+ 13 - 3
mount/mount.go

@@ -25,27 +25,37 @@ func Mounted(mountpoint string) (bool, error) {
 	return false, nil
 }
 
-// Mount the specified options at the target path
+// Mount the specified options at the target path only if
+// the target is not mounted
 // Options must be specified as fstab style
 func Mount(device, target, mType, options string) error {
 	if mounted, err := Mounted(target); err != nil || mounted {
 		return err
 	}
+	return ForceMount(device, target, mType, options)
+}
 
+// Mount the specified options at the target path
+// reguardless if the target is mounted or not
+// Options must be specified as fstab style
+func ForceMount(device, target, mType, options string) error {
 	flag, data := parseOptions(options)
 	if err := mount(device, target, mType, uintptr(flag), data); err != nil {
 		return err
 	}
 	return nil
-
 }
 
 // Unmount the target only if it is mounted
-func Unmount(target string) (err error) {
+func Unmount(target string) error {
 	if mounted, err := Mounted(target); err != nil || !mounted {
 		return err
 	}
+	return ForceUnmount(target)
+}
 
+// Unmount the target reguardless if it is mounted or not
+func ForceUnmount(target string) (err error) {
 	// Simple retry logic for unmount
 	for i := 0; i < 10; i++ {
 		if err = unmount(target, 0); err == nil {

+ 55 - 0
pkg/sysinfo/sysinfo.go

@@ -0,0 +1,55 @@
+package sysinfo
+
+import (
+	"github.com/dotcloud/docker/cgroups"
+	"github.com/dotcloud/docker/utils"
+	"io/ioutil"
+	"log"
+	"os"
+	"path"
+)
+
+type SysInfo struct {
+	MemoryLimit            bool
+	SwapLimit              bool
+	IPv4ForwardingDisabled bool
+	AppArmor               bool
+}
+
+func New(quiet bool) *SysInfo {
+	sysInfo := &SysInfo{}
+	if cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory"); err != nil {
+		if !quiet {
+			log.Printf("WARNING: %s\n", err)
+		}
+	} else {
+		_, err1 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.limit_in_bytes"))
+		_, err2 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.soft_limit_in_bytes"))
+		sysInfo.MemoryLimit = err1 == nil && err2 == nil
+		if !sysInfo.MemoryLimit && !quiet {
+			log.Printf("WARNING: Your kernel does not support cgroup memory limit.")
+		}
+
+		_, err = ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.memsw.limit_in_bytes"))
+		sysInfo.SwapLimit = err == nil
+		if !sysInfo.SwapLimit && !quiet {
+			log.Printf("WARNING: Your kernel does not support cgroup swap limit.")
+		}
+	}
+
+	content, err3 := ioutil.ReadFile("/proc/sys/net/ipv4/ip_forward")
+	sysInfo.IPv4ForwardingDisabled = err3 != nil || len(content) == 0 || content[0] != '1'
+	if sysInfo.IPv4ForwardingDisabled && !quiet {
+		log.Printf("WARNING: IPv4 forwarding is disabled.")
+	}
+
+	// Check if AppArmor seems to be enabled on this system.
+	if _, err := os.Stat("/sys/kernel/security/apparmor"); os.IsNotExist(err) {
+		utils.Debugf("/sys/kernel/security/apparmor not found; assuming AppArmor is not enabled.")
+		sysInfo.AppArmor = false
+	} else {
+		utils.Debugf("/sys/kernel/security/apparmor found; assuming AppArmor is enabled.")
+		sysInfo.AppArmor = true
+	}
+	return sysInfo
+}

+ 44 - 77
runtime.go

@@ -4,18 +4,19 @@ import (
 	"container/list"
 	"fmt"
 	"github.com/dotcloud/docker/archive"
-	"github.com/dotcloud/docker/cgroups"
+	"github.com/dotcloud/docker/execdriver"
+	"github.com/dotcloud/docker/execdriver/chroot"
+	"github.com/dotcloud/docker/execdriver/lxc"
 	"github.com/dotcloud/docker/graphdriver"
 	"github.com/dotcloud/docker/graphdriver/aufs"
 	_ "github.com/dotcloud/docker/graphdriver/devmapper"
 	_ "github.com/dotcloud/docker/graphdriver/vfs"
 	"github.com/dotcloud/docker/pkg/graphdb"
+	"github.com/dotcloud/docker/pkg/sysinfo"
 	"github.com/dotcloud/docker/utils"
 	"io"
 	"io/ioutil"
-	"log"
 	"os"
-	"os/exec"
 	"path"
 	"regexp"
 	"sort"
@@ -35,13 +36,6 @@ var (
 	validContainerNamePattern = regexp.MustCompile(`^/?` + validContainerNameChars + `+$`)
 )
 
-type Capabilities struct {
-	MemoryLimit            bool
-	SwapLimit              bool
-	IPv4ForwardingDisabled bool
-	AppArmor               bool
-}
-
 type Runtime struct {
 	repository     string
 	sysInitPath    string
@@ -50,12 +44,13 @@ type Runtime struct {
 	graph          *Graph
 	repositories   *TagStore
 	idIndex        *utils.TruncIndex
-	capabilities   *Capabilities
+	sysInfo        *sysinfo.SysInfo
 	volumes        *Graph
 	srv            *Server
 	config         *DaemonConfig
 	containerGraph *graphdb.Database
 	driver         graphdriver.Driver
+	execDriver     execdriver.Driver
 }
 
 // List returns an array of all containers registered in the runtime.
@@ -160,11 +155,9 @@ func (runtime *Runtime) Register(container *Container) error {
 	//        if so, then we need to restart monitor and init a new lock
 	// If the container is supposed to be running, make sure of it
 	if container.State.IsRunning() {
-		output, err := exec.Command("lxc-info", "-n", container.ID).CombinedOutput()
-		if err != nil {
-			return err
-		}
-		if !strings.Contains(string(output), "RUNNING") {
+		info := runtime.execDriver.Info(container.ID)
+
+		if !info.IsRunning() {
 			utils.Debugf("Container %s was supposed to be running but is not.", container.ID)
 			if runtime.config.AutoRestart {
 				utils.Debugf("Restarting")
@@ -188,7 +181,7 @@ func (runtime *Runtime) Register(container *Container) error {
 			}
 
 			container.waitLock = make(chan struct{})
-			go container.monitor()
+			go container.monitor(nil)
 		}
 	}
 	return nil
@@ -331,43 +324,6 @@ func (runtime *Runtime) restore() error {
 	return nil
 }
 
-// FIXME: comment please!
-func (runtime *Runtime) UpdateCapabilities(quiet bool) {
-	if cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory"); err != nil {
-		if !quiet {
-			log.Printf("WARNING: %s\n", err)
-		}
-	} else {
-		_, err1 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.limit_in_bytes"))
-		_, err2 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.soft_limit_in_bytes"))
-		runtime.capabilities.MemoryLimit = err1 == nil && err2 == nil
-		if !runtime.capabilities.MemoryLimit && !quiet {
-			log.Printf("WARNING: Your kernel does not support cgroup memory limit.")
-		}
-
-		_, err = ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.memsw.limit_in_bytes"))
-		runtime.capabilities.SwapLimit = err == nil
-		if !runtime.capabilities.SwapLimit && !quiet {
-			log.Printf("WARNING: Your kernel does not support cgroup swap limit.")
-		}
-	}
-
-	content, err3 := ioutil.ReadFile("/proc/sys/net/ipv4/ip_forward")
-	runtime.capabilities.IPv4ForwardingDisabled = err3 != nil || len(content) == 0 || content[0] != '1'
-	if runtime.capabilities.IPv4ForwardingDisabled && !quiet {
-		log.Printf("WARNING: IPv4 forwarding is disabled.")
-	}
-
-	// Check if AppArmor seems to be enabled on this system.
-	if _, err := os.Stat("/sys/kernel/security/apparmor"); os.IsNotExist(err) {
-		utils.Debugf("/sys/kernel/security/apparmor not found; assuming AppArmor is not enabled.")
-		runtime.capabilities.AppArmor = false
-	} else {
-		utils.Debugf("/sys/kernel/security/apparmor found; assuming AppArmor is enabled.")
-		runtime.capabilities.AppArmor = true
-	}
-}
-
 // Create creates a new container from the given configuration with a given name.
 func (runtime *Runtime) Create(config *Config, name string) (*Container, []string, error) {
 	// Lookup image
@@ -646,7 +602,6 @@ func NewRuntime(config *DaemonConfig) (*Runtime, error) {
 	if err != nil {
 		return nil, err
 	}
-	runtime.UpdateCapabilities(false)
 	return runtime, nil
 }
 
@@ -675,10 +630,6 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 		}
 	}
 
-	utils.Debugf("Escaping AppArmor confinement")
-	if err := linkLxcStart(config.Root); err != nil {
-		return nil, err
-	}
 	utils.Debugf("Creating images graph")
 	g, err := NewGraph(path.Join(config.Root, "graph"), driver)
 	if err != nil {
@@ -735,6 +686,26 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 		sysInitPath = localCopy
 	}
 
+	sysInfo := sysinfo.New(false)
+
+	/*
+		temporarilly disabled.
+	*/
+	if false {
+		var ed execdriver.Driver
+		if driver := os.Getenv("EXEC_DRIVER"); driver == "lxc" {
+			ed, err = lxc.NewDriver(config.Root, sysInfo.AppArmor)
+		} else {
+			ed, err = chroot.NewDriver()
+		}
+		if ed != nil {
+		}
+	}
+	ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor)
+	if err != nil {
+		return nil, err
+	}
+
 	runtime := &Runtime{
 		repository:     runtimeRepo,
 		containers:     list.New(),
@@ -742,12 +713,13 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
 		graph:          g,
 		repositories:   repositories,
 		idIndex:        utils.NewTruncIndex(),
-		capabilities:   &Capabilities{},
+		sysInfo:        sysInfo,
 		volumes:        volumes,
 		config:         config,
 		containerGraph: graph,
 		driver:         driver,
 		sysInitPath:    sysInitPath,
+		execDriver:     ed,
 	}
 
 	if err := runtime.restore(); err != nil {
@@ -829,6 +801,18 @@ func (runtime *Runtime) Diff(container *Container) (archive.Archive, error) {
 	return archive.ExportChanges(cDir, changes)
 }
 
+func (runtime *Runtime) Run(c *Container, startCallback execdriver.StartCallback) (int, error) {
+	return runtime.execDriver.Run(c.process, startCallback)
+}
+
+func (runtime *Runtime) Kill(c *Container, sig int) error {
+	return runtime.execDriver.Kill(c.process, sig)
+}
+
+func (runtime *Runtime) WaitGhost(c *Container) error {
+	return runtime.execDriver.Wait(c.ID)
+}
+
 // Nuke kills all containers then removes all content
 // from the content root, including images, volumes and
 // container filesystems.
@@ -848,23 +832,6 @@ func (runtime *Runtime) Nuke() error {
 	return os.RemoveAll(runtime.config.Root)
 }
 
-func linkLxcStart(root string) error {
-	sourcePath, err := exec.LookPath("lxc-start")
-	if err != nil {
-		return err
-	}
-	targetPath := path.Join(root, "lxc-start-unconfined")
-
-	if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) {
-		return err
-	} else if err == nil {
-		if err := os.Remove(targetPath); err != nil {
-			return err
-		}
-	}
-	return os.Symlink(sourcePath, targetPath)
-}
-
 // FIXME: this is a convenience function for integration tests
 // which need direct access to runtime.graph.
 // Once the tests switch to using engine and jobs, this method

+ 7 - 14
server.go

@@ -516,7 +516,7 @@ func (srv *Server) ImageInsert(job *engine.Job) engine.Status {
 	}
 	defer file.Body.Close()
 
-	config, _, _, err := ParseRun([]string{img.ID, "echo", "insert", url, path}, srv.runtime.capabilities)
+	config, _, _, err := ParseRun([]string{img.ID, "echo", "insert", url, path}, srv.runtime.sysInfo)
 	if err != nil {
 		job.Error(err)
 		return engine.StatusErr
@@ -661,13 +661,6 @@ func (srv *Server) DockerInfo(job *engine.Job) engine.Status {
 	} else {
 		imgcount = len(images)
 	}
-	lxcVersion := ""
-	if output, err := exec.Command("lxc-version").CombinedOutput(); err == nil {
-		outputStr := string(output)
-		if len(strings.SplitN(outputStr, ":", 2)) == 2 {
-			lxcVersion = strings.TrimSpace(strings.SplitN(string(output), ":", 2)[1])
-		}
-	}
 	kernelVersion := "<unknown>"
 	if kv, err := utils.GetKernelVersion(); err == nil {
 		kernelVersion = kv.String()
@@ -685,13 +678,13 @@ func (srv *Server) DockerInfo(job *engine.Job) engine.Status {
 	v.SetInt("Images", imgcount)
 	v.Set("Driver", srv.runtime.driver.String())
 	v.SetJson("DriverStatus", srv.runtime.driver.Status())
-	v.SetBool("MemoryLimit", srv.runtime.capabilities.MemoryLimit)
-	v.SetBool("SwapLimit", srv.runtime.capabilities.SwapLimit)
-	v.SetBool("IPv4Forwarding", !srv.runtime.capabilities.IPv4ForwardingDisabled)
+	v.SetBool("MemoryLimit", srv.runtime.sysInfo.MemoryLimit)
+	v.SetBool("SwapLimit", srv.runtime.sysInfo.SwapLimit)
+	v.SetBool("IPv4Forwarding", !srv.runtime.sysInfo.IPv4ForwardingDisabled)
 	v.SetBool("Debug", os.Getenv("DEBUG") != "")
 	v.SetInt("NFd", utils.GetTotalUsedFds())
 	v.SetInt("NGoroutines", runtime.NumGoroutine())
-	v.Set("LXCVersion", lxcVersion)
+	v.Set("ExecutionDriver", srv.runtime.execDriver.Name())
 	v.SetInt("NEventsListener", len(srv.events))
 	v.Set("KernelVersion", kernelVersion)
 	v.Set("IndexServerAddress", auth.IndexServerAddress())
@@ -1477,10 +1470,10 @@ func (srv *Server) ContainerCreate(job *engine.Job) engine.Status {
 		job.Errorf("Minimum memory limit allowed is 512k")
 		return engine.StatusErr
 	}
-	if config.Memory > 0 && !srv.runtime.capabilities.MemoryLimit {
+	if config.Memory > 0 && !srv.runtime.sysInfo.MemoryLimit {
 		config.Memory = 0
 	}
-	if config.Memory > 0 && !srv.runtime.capabilities.SwapLimit {
+	if config.Memory > 0 && !srv.runtime.sysInfo.SwapLimit {
 		config.MemorySwap = -1
 	}
 	container, buildWarnings, err := srv.runtime.Create(&config, name)

+ 23 - 196
sysinit/sysinit.go

@@ -4,163 +4,19 @@ import (
 	"encoding/json"
 	"flag"
 	"fmt"
-	"github.com/dotcloud/docker/pkg/netlink"
-	"github.com/dotcloud/docker/utils"
-	"github.com/syndtr/gocapability/capability"
+	"github.com/dotcloud/docker/execdriver"
+	_ "github.com/dotcloud/docker/execdriver/chroot"
+	_ "github.com/dotcloud/docker/execdriver/lxc"
 	"io/ioutil"
 	"log"
-	"net"
 	"os"
-	"os/exec"
-	"strconv"
 	"strings"
-	"syscall"
 )
 
-type DockerInitArgs struct {
-	user       string
-	gateway    string
-	ip         string
-	workDir    string
-	privileged bool
-	env        []string
-	args       []string
-	mtu        int
-}
-
-func setupHostname(args *DockerInitArgs) error {
-	hostname := getEnv(args, "HOSTNAME")
-	if hostname == "" {
-		return nil
-	}
-	return setHostname(hostname)
-}
-
-// Setup networking
-func setupNetworking(args *DockerInitArgs) error {
-	if args.ip != "" {
-		// eth0
-		iface, err := net.InterfaceByName("eth0")
-		if err != nil {
-			return fmt.Errorf("Unable to set up networking: %v", err)
-		}
-		ip, ipNet, err := net.ParseCIDR(args.ip)
-		if err != nil {
-			return fmt.Errorf("Unable to set up networking: %v", err)
-		}
-		if err := netlink.NetworkLinkAddIp(iface, ip, ipNet); err != nil {
-			return fmt.Errorf("Unable to set up networking: %v", err)
-		}
-		if err := netlink.NetworkSetMTU(iface, args.mtu); err != nil {
-			return fmt.Errorf("Unable to set MTU: %v", err)
-		}
-		if err := netlink.NetworkLinkUp(iface); err != nil {
-			return fmt.Errorf("Unable to set up networking: %v", err)
-		}
-
-		// loopback
-		iface, err = net.InterfaceByName("lo")
-		if err != nil {
-			return fmt.Errorf("Unable to set up networking: %v", err)
-		}
-		if err := netlink.NetworkLinkUp(iface); err != nil {
-			return fmt.Errorf("Unable to set up networking: %v", err)
-		}
-	}
-	if args.gateway != "" {
-		gw := net.ParseIP(args.gateway)
-		if gw == nil {
-			return fmt.Errorf("Unable to set up networking, %s is not a valid gateway IP", args.gateway)
-		}
-
-		if err := netlink.AddDefaultGw(gw); err != nil {
-			return fmt.Errorf("Unable to set up networking: %v", err)
-		}
-	}
-
-	return nil
-}
-
-// Setup working directory
-func setupWorkingDirectory(args *DockerInitArgs) error {
-	if args.workDir == "" {
-		return nil
-	}
-	if err := syscall.Chdir(args.workDir); err != nil {
-		return fmt.Errorf("Unable to change dir to %v: %v", args.workDir, err)
-	}
-	return nil
-}
-
-// Takes care of dropping privileges to the desired user
-func changeUser(args *DockerInitArgs) error {
-	if args.user == "" {
-		return nil
-	}
-	userent, err := utils.UserLookup(args.user)
-	if err != nil {
-		return fmt.Errorf("Unable to find user %v: %v", args.user, err)
-	}
-
-	uid, err := strconv.Atoi(userent.Uid)
-	if err != nil {
-		return fmt.Errorf("Invalid uid: %v", userent.Uid)
-	}
-	gid, err := strconv.Atoi(userent.Gid)
-	if err != nil {
-		return fmt.Errorf("Invalid gid: %v", userent.Gid)
-	}
-
-	if err := syscall.Setgid(gid); err != nil {
-		return fmt.Errorf("setgid failed: %v", err)
-	}
-	if err := syscall.Setuid(uid); err != nil {
-		return fmt.Errorf("setuid failed: %v", err)
-	}
-
-	return nil
-}
-
-func setupCapabilities(args *DockerInitArgs) error {
-
-	if args.privileged {
-		return nil
-	}
-
-	drop := []capability.Cap{
-		capability.CAP_SETPCAP,
-		capability.CAP_SYS_MODULE,
-		capability.CAP_SYS_RAWIO,
-		capability.CAP_SYS_PACCT,
-		capability.CAP_SYS_ADMIN,
-		capability.CAP_SYS_NICE,
-		capability.CAP_SYS_RESOURCE,
-		capability.CAP_SYS_TIME,
-		capability.CAP_SYS_TTY_CONFIG,
-		capability.CAP_MKNOD,
-		capability.CAP_AUDIT_WRITE,
-		capability.CAP_AUDIT_CONTROL,
-		capability.CAP_MAC_OVERRIDE,
-		capability.CAP_MAC_ADMIN,
-	}
-
-	c, err := capability.NewPid(os.Getpid())
-	if err != nil {
-		return err
-	}
-
-	c.Unset(capability.CAPS|capability.BOUNDS, drop...)
-
-	if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil {
-		return err
-	}
-	return nil
-}
-
 // Clear environment pollution introduced by lxc-start
-func setupEnv(args *DockerInitArgs) {
+func setupEnv(args *execdriver.InitArgs) {
 	os.Clearenv()
-	for _, kv := range args.env {
+	for _, kv := range args.Env {
 		parts := strings.SplitN(kv, "=", 2)
 		if len(parts) == 1 {
 			parts = append(parts, "")
@@ -169,50 +25,19 @@ func setupEnv(args *DockerInitArgs) {
 	}
 }
 
-func getEnv(args *DockerInitArgs, key string) string {
-	for _, kv := range args.env {
-		parts := strings.SplitN(kv, "=", 2)
-		if parts[0] == key && len(parts) == 2 {
-			return parts[1]
-		}
-	}
-	return ""
-}
-
-func executeProgram(args *DockerInitArgs) error {
+func executeProgram(args *execdriver.InitArgs) error {
 	setupEnv(args)
-
-	if err := setupHostname(args); err != nil {
-		return err
-	}
-
-	if err := setupNetworking(args); err != nil {
-		return err
-	}
-
-	if err := setupCapabilities(args); err != nil {
-		return err
-	}
-
-	if err := setupWorkingDirectory(args); err != nil {
-		return err
-	}
-
-	if err := changeUser(args); err != nil {
-		return err
-	}
-
-	path, err := exec.LookPath(args.args[0])
+	dockerInitFct, err := execdriver.GetInitFunc(args.Driver)
 	if err != nil {
-		log.Printf("Unable to locate %v", args.args[0])
-		os.Exit(127)
+		panic(err)
 	}
+	return dockerInitFct(args)
 
-	if err := syscall.Exec(path, args.args, os.Environ()); err != nil {
-		return fmt.Errorf("dockerinit unable to execute %s - %s", path, err)
+	if args.Driver == "lxc" {
+		// Will never reach
+	} else if args.Driver == "chroot" {
 	}
 
-	// Will never reach here
 	return nil
 }
 
@@ -232,6 +57,7 @@ func SysInit() {
 	workDir := flag.String("w", "", "workdir")
 	privileged := flag.Bool("privileged", false, "privileged mode")
 	mtu := flag.Int("mtu", 1500, "interface mtu")
+	driver := flag.String("driver", "", "exec driver")
 	flag.Parse()
 
 	// Get env
@@ -247,15 +73,16 @@ func SysInit() {
 	// Propagate the plugin-specific container env variable
 	env = append(env, "container="+os.Getenv("container"))
 
-	args := &DockerInitArgs{
-		user:       *user,
-		gateway:    *gateway,
-		ip:         *ip,
-		workDir:    *workDir,
-		privileged: *privileged,
-		env:        env,
-		args:       flag.Args(),
-		mtu:        *mtu,
+	args := &execdriver.InitArgs{
+		User:       *user,
+		Gateway:    *gateway,
+		Ip:         *ip,
+		WorkDir:    *workDir,
+		Privileged: *privileged,
+		Env:        env,
+		Args:       flag.Args(),
+		Mtu:        *mtu,
+		Driver:     *driver,
 	}
 
 	if err := executeProgram(args); err != nil {

+ 0 - 15
utils.go

@@ -5,7 +5,6 @@ import (
 	"github.com/dotcloud/docker/archive"
 	"github.com/dotcloud/docker/pkg/namesgenerator"
 	"github.com/dotcloud/docker/utils"
-	"io/ioutil"
 	"strconv"
 	"strings"
 )
@@ -328,20 +327,6 @@ func parseLink(rawLink string) (map[string]string, error) {
 	return utils.PartParser("name:alias", rawLink)
 }
 
-func RootIsShared() bool {
-	if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil {
-		for _, line := range strings.Split(string(data), "\n") {
-			cols := strings.Split(line, " ")
-			if len(cols) >= 6 && cols[4] == "/" {
-				return strings.HasPrefix(cols[6], "shared")
-			}
-		}
-	}
-
-	// No idea, probably safe to assume so
-	return true
-}
-
 type checker struct {
 	runtime *Runtime
 }