Pārlūkot izejas kodu

Make separate nsinit pkg for a dockerinit like init
Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)

Michael Crosby 11 gadi atpakaļ
vecāks
revīzija
68b049aed4

+ 2 - 0
pkg/libcontainer/container.go

@@ -11,6 +11,8 @@ type Container struct {
 	WorkingDir   string       `json:"working_dir,omitempty"`
 	Namespaces   Namespaces   `json:"namespaces,omitempty"`
 	Capabilities Capabilities `json:"capabilities,omitempty"`
+	Master       uintptr      `json:"master"`
+	Console      string       `json:"console"`
 }
 
 type Command struct {

+ 35 - 45
pkg/libcontainer/namespaces/calls_linux.go

@@ -12,19 +12,19 @@ const (
 	TIOCSPTLCK = 0x40045431
 )
 
-func chroot(dir string) error {
+func Chroot(dir string) error {
 	return syscall.Chroot(dir)
 }
 
-func chdir(dir string) error {
+func Chdir(dir string) error {
 	return syscall.Chdir(dir)
 }
 
-func exec(cmd string, args []string, env []string) error {
+func Exec(cmd string, args []string, env []string) error {
 	return syscall.Exec(cmd, args, env)
 }
 
-func fork() (int, error) {
+func Fork() (int, error) {
 	syscall.ForkLock.Lock()
 	pid, _, err := syscall.Syscall(syscall.SYS_FORK, 0, 0, 0)
 	syscall.ForkLock.Unlock()
@@ -34,33 +34,23 @@ func fork() (int, error) {
 	return int(pid), nil
 }
 
-func vfork() (int, error) {
-	syscall.ForkLock.Lock()
-	pid, _, err := syscall.Syscall(syscall.SYS_VFORK, 0, 0, 0)
-	syscall.ForkLock.Unlock()
-	if err != 0 {
-		return -1, err
-	}
-	return int(pid), nil
-}
-
-func mount(source, target, fstype string, flags uintptr, data string) error {
+func Mount(source, target, fstype string, flags uintptr, data string) error {
 	return syscall.Mount(source, target, fstype, flags, data)
 }
 
-func unmount(target string, flags int) error {
+func Unmount(target string, flags int) error {
 	return syscall.Unmount(target, flags)
 }
 
-func pivotroot(newroot, putold string) error {
+func Pivotroot(newroot, putold string) error {
 	return syscall.PivotRoot(newroot, putold)
 }
 
-func unshare(flags int) error {
+func Unshare(flags int) error {
 	return syscall.Unshare(flags)
 }
 
-func clone(flags uintptr) (int, error) {
+func Clone(flags uintptr) (int, error) {
 	syscall.ForkLock.Lock()
 	pid, _, err := syscall.RawSyscall(syscall.SYS_CLONE, flags, 0, 0)
 	syscall.ForkLock.Unlock()
@@ -70,7 +60,7 @@ func clone(flags uintptr) (int, error) {
 	return int(pid), nil
 }
 
-func setns(fd uintptr, flags uintptr) error {
+func Setns(fd uintptr, flags uintptr) error {
 	_, _, err := syscall.RawSyscall(SYS_SETNS, fd, flags, 0)
 	if err != 0 {
 		return err
@@ -78,87 +68,87 @@ func setns(fd uintptr, flags uintptr) error {
 	return nil
 }
 
-func usetCloseOnExec(fd uintptr) error {
+func UsetCloseOnExec(fd uintptr) error {
 	if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0); err != 0 {
 		return err
 	}
 	return nil
 }
 
-func setgroups(gids []int) error {
+func Setgroups(gids []int) error {
 	return syscall.Setgroups(gids)
 }
 
-func setresgid(rgid, egid, sgid int) error {
+func Setresgid(rgid, egid, sgid int) error {
 	return syscall.Setresgid(rgid, egid, sgid)
 }
 
-func setresuid(ruid, euid, suid int) error {
+func Setresuid(ruid, euid, suid int) error {
 	return syscall.Setresuid(ruid, euid, suid)
 }
 
-func sethostname(name string) error {
+func Sethostname(name string) error {
 	return syscall.Sethostname([]byte(name))
 }
 
-func setsid() (int, error) {
+func Setsid() (int, error) {
 	return syscall.Setsid()
 }
 
-func ioctl(fd uintptr, flag, data uintptr) error {
+func Unlockpt(f *os.File) error {
+	var u int
+	return Ioctl(f.Fd(), TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
+}
+
+func Ioctl(fd uintptr, flag, data uintptr) error {
 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
 		return err
 	}
 	return nil
 }
 
-func openpmtx() (*os.File, error) {
-	return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
-}
-
-func unlockpt(f *os.File) error {
-	var u int
-	return ioctl(f.Fd(), TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
-}
-
-func ptsname(f *os.File) (string, error) {
+func Ptsname(f *os.File) (string, error) {
 	var n int
-	if err := ioctl(f.Fd(), TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
+	if err := Ioctl(f.Fd(), TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
 		return "", err
 	}
 	return fmt.Sprintf("/dev/pts/%d", n), nil
 }
 
-func closefd(fd uintptr) error {
+func Openpmtx() (*os.File, error) {
+	return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
+}
+
+func Closefd(fd uintptr) error {
 	return syscall.Close(int(fd))
 }
 
-func dup2(fd1, fd2 uintptr) error {
+func Dup2(fd1, fd2 uintptr) error {
 	return syscall.Dup2(int(fd1), int(fd2))
 }
 
-func mknod(path string, mode uint32, dev int) error {
+func Mknod(path string, mode uint32, dev int) error {
 	return syscall.Mknod(path, mode, dev)
 }
 
-func parentDeathSignal() error {
+func ParentDeathSignal() error {
 	if _, _, err := syscall.RawSyscall6(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0, 0, 0, 0); err != 0 {
 		return err
 	}
 	return nil
 }
 
-func setctty() error {
+func Setctty() error {
 	if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
 		return err
 	}
 	return nil
 }
 
-func mkfifo(name string, mode uint32) error {
+func Mkfifo(name string, mode uint32) error {
 	return syscall.Mkfifo(name, mode)
 }
 
-func umask(mask int) int {
+func Umask(mask int) int {
 	return syscall.Umask(mask)
 }

+ 79 - 175
pkg/libcontainer/namespaces/exec.go

@@ -8,12 +8,10 @@ import (
 	"errors"
 	"fmt"
 	"github.com/dotcloud/docker/pkg/libcontainer"
-	"github.com/dotcloud/docker/pkg/libcontainer/capabilities"
-	"github.com/dotcloud/docker/pkg/libcontainer/utils"
 	"io"
 	"log"
 	"os"
-	"path/filepath"
+	"os/exec"
 	"syscall"
 )
 
@@ -29,89 +27,31 @@ var (
 // the container will be spawned with a new network namespace with no configuration.  Omiting an
 // existing network namespace and the CLONE_NEWNET option in the container configuration will allow
 // the container to the the host's networking options and configuration.
-func Exec(container *libcontainer.Container) (pid int, err error) {
+func ExecContainer(container *libcontainer.Container) (pid int, err error) {
 	// a user cannot pass CLONE_NEWNET and an existing net namespace fd to join
 	if container.NetNsFd > 0 && container.Namespaces.Contains(libcontainer.CLONE_NEWNET) {
 		return -1, ErrExistingNetworkNamespace
 	}
 
-	rootfs, err := resolveRootfs(container)
-	if err != nil {
-		return -1, err
-	}
-
 	master, console, err := createMasterAndConsole()
 	if err != nil {
 		return -1, err
 	}
-
-	logger, err := os.OpenFile("/root/logs", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755)
-	if err != nil {
-		return -1, err
-	}
-	log.SetOutput(logger)
+	container.Console = console
+	container.Master = master.Fd()
 
 	// we need CLONE_VFORK so we can wait on the child
-	flag := getNamespaceFlags(container.Namespaces) | CLONE_VFORK
-
-	if pid, err = clone(uintptr(flag | SIGCHLD)); err != nil {
-		return -1, fmt.Errorf("error cloning process: %s", err)
-	}
+	flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK)
 
-	if pid == 0 {
-		// welcome to your new namespace ;)
-		//
-		// any errors encoutered inside the namespace we should write
-		// out to a log or a pipe to our parent and exit(1)
-		// because writing to stderr will not work after we close
-		if err := closeMasterAndStd(master); err != nil {
-			writeError("close master and std %s", err)
-		}
-		slave, err := openTerminal(console, syscall.O_RDWR)
-		if err != nil {
-			writeError("open terminal %s", err)
-		}
-		if err := dupSlave(slave); err != nil {
-			writeError("dup2 slave %s", err)
-		}
-
-		if container.NetNsFd > 0 {
-			if err := JoinExistingNamespace(container.NetNsFd, libcontainer.CLONE_NEWNET); err != nil {
-				writeError("join existing net namespace %s", err)
-			}
-		}
+	command := exec.Command("/.nsinit")
+	command.SysProcAttr = &syscall.SysProcAttr{}
+	command.SysProcAttr.Cloneflags = flag
+	command.SysProcAttr.Setctty = true
 
-		if _, err := setsid(); err != nil {
-			writeError("setsid %s", err)
-		}
-		if err := setctty(); err != nil {
-			writeError("setctty %s", err)
-		}
-		if err := parentDeathSignal(); err != nil {
-			writeError("parent deth signal %s", err)
-		}
-		if err := SetupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil {
-			writeError("setup mount namespace %s", err)
-		}
-		if err := sethostname(container.ID); err != nil {
-			writeError("sethostname %s", err)
-		}
-		if err := capabilities.DropCapabilities(container); err != nil {
-			writeError("drop capabilities %s", err)
-		}
-		if err := setupUser(container); err != nil {
-			writeError("setup user %s", err)
-		}
-		if container.WorkingDir != "" {
-			if err := chdir(container.WorkingDir); err != nil {
-				writeError("chdir to %s %s", container.WorkingDir, err)
-			}
-		}
-		if err := exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil {
-			writeError("exec %s", err)
-		}
-		panic("unreachable")
+	if err := command.Start(); err != nil {
+		return -1, err
 	}
+	pid = command.Process.Pid
 
 	go func() {
 		if _, err := io.Copy(os.Stdout, master); err != nil {
@@ -130,91 +70,86 @@ func Exec(container *libcontainer.Container) (pid int, err error) {
 // pid and namespace configuration is needed along with the specific capabilities that should
 // be dropped once inside the namespace.
 func ExecIn(container *libcontainer.Container, cmd *libcontainer.Command) (int, error) {
-	if container.NsPid <= 0 {
-		return -1, libcontainer.ErrInvalidPid
-	}
-
-	fds, err := getNsFds(container)
-	if err != nil {
-		return -1, err
-	}
+	return -1, fmt.Errorf("not implemented")
+	/*
+		if container.NsPid <= 0 {
+			return -1, libcontainer.ErrInvalidPid
+		}
 
-	if container.NetNsFd > 0 {
-		fds = append(fds, container.NetNsFd)
-	}
+		fds, err := getNsFds(container)
+		if err != nil {
+			return -1, err
+		}
 
-	pid, err := fork()
-	if err != nil {
-		for _, fd := range fds {
-			syscall.Close(int(fd))
+		if container.NetNsFd > 0 {
+			fds = append(fds, container.NetNsFd)
 		}
-		return -1, err
-	}
 
-	if pid == 0 {
-		for _, fd := range fds {
-			if fd > 0 {
-				if err := JoinExistingNamespace(fd, ""); err != nil {
-					for _, fd := range fds {
-						syscall.Close(int(fd))
-					}
-					writeError("join existing namespace for %d %s", fd, err)
-				}
+		pid, err := fork()
+		if err != nil {
+			for _, fd := range fds {
+				syscall.Close(int(fd))
 			}
-			syscall.Close(int(fd))
+			return -1, err
 		}
 
-		if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) &&
-			container.Namespaces.Contains(libcontainer.CLONE_NEWPID) {
-			// important:
-			//
-			// we need to fork and unshare so that re can remount proc and sys within
-			// the namespace so the CLONE_NEWPID namespace will take effect
-			// if we don't fork we would end up unmounting proc and sys for the entire
-			// namespace
-			child, err := fork()
-			if err != nil {
-				writeError("fork child %s", err)
+		if pid == 0 {
+			for _, fd := range fds {
+				if fd > 0 {
+					if err := JoinExistingNamespace(fd, ""); err != nil {
+						for _, fd := range fds {
+							syscall.Close(int(fd))
+						}
+						writeError("join existing namespace for %d %s", fd, err)
+					}
+				}
+				syscall.Close(int(fd))
 			}
 
-			if child == 0 {
-				if err := unshare(CLONE_NEWNS); err != nil {
-					writeError("unshare newns %s", err)
-				}
-				if err := remountProc(); err != nil {
-					writeError("remount proc %s", err)
+			if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) &&
+				container.Namespaces.Contains(libcontainer.CLONE_NEWPID) {
+				// important:
+				//
+				// we need to fork and unshare so that re can remount proc and sys within
+				// the namespace so the CLONE_NEWPID namespace will take effect
+				// if we don't fork we would end up unmounting proc and sys for the entire
+				// namespace
+				child, err := fork()
+				if err != nil {
+					writeError("fork child %s", err)
 				}
-				if err := remountSys(); err != nil {
-					writeError("remount sys %s", err)
-				}
-				if err := capabilities.DropCapabilities(container); err != nil {
-					writeError("drop caps %s", err)
+
+				if child == 0 {
+					if err := unshare(CLONE_NEWNS); err != nil {
+						writeError("unshare newns %s", err)
+					}
+					if err := remountProc(); err != nil {
+						writeError("remount proc %s", err)
+					}
+					if err := remountSys(); err != nil {
+						writeError("remount sys %s", err)
+					}
+					if err := capabilities.DropCapabilities(container); err != nil {
+						writeError("drop caps %s", err)
+					}
+					if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil {
+						writeError("exec %s", err)
+					}
+					panic("unreachable")
 				}
-				if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil {
-					writeError("exec %s", err)
+				exit, err := utils.WaitOnPid(child)
+				if err != nil {
+					writeError("wait on child %s", err)
 				}
-				panic("unreachable")
+				os.Exit(exit)
 			}
-			exit, err := utils.WaitOnPid(child)
-			if err != nil {
-				writeError("wait on child %s", err)
+			if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil {
+				writeError("exec %s", err)
 			}
-			os.Exit(exit)
-		}
-		if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil {
-			writeError("exec %s", err)
+			panic("unreachable")
 		}
-		panic("unreachable")
-	}
-	return pid, err
-}
-
-func resolveRootfs(container *libcontainer.Container) (string, error) {
-	rootfs, err := filepath.Abs(container.RootFs)
-	if err != nil {
-		return "", err
-	}
-	return filepath.EvalSymlinks(rootfs)
+		return pid, err
+	*/
 }
 
 func createMasterAndConsole() (*os.File, string, error) {
@@ -223,44 +158,13 @@ func createMasterAndConsole() (*os.File, string, error) {
 		return nil, "", err
 	}
 
-	console, err := ptsname(master)
+	console, err := Ptsname(master)
 	if err != nil {
 		return nil, "", err
 	}
 
-	if err := unlockpt(master); err != nil {
+	if err := Unlockpt(master); err != nil {
 		return nil, "", err
 	}
 	return master, console, nil
 }
-
-func closeMasterAndStd(master *os.File) error {
-	closefd(master.Fd())
-	closefd(0)
-	closefd(1)
-	closefd(2)
-
-	return nil
-}
-
-func dupSlave(slave *os.File) error {
-	// we close Stdin,etc so our pty slave should have fd 0
-	if slave.Fd() != 0 {
-		return fmt.Errorf("slave fd not 0 %d", slave.Fd())
-	}
-	if err := dup2(slave.Fd(), 1); err != nil {
-		return err
-	}
-	if err := dup2(slave.Fd(), 2); err != nil {
-		return err
-	}
-	return nil
-}
-
-func openTerminal(name string, flag int) (*os.File, error) {
-	r, e := syscall.Open(name, flag, 0)
-	if e != nil {
-		return nil, &os.PathError{"open", name, e}
-	}
-	return os.NewFile(uintptr(r), name), nil
-}

+ 18 - 18
pkg/libcontainer/namespaces/mount.go

@@ -14,16 +14,16 @@ var (
 )
 
 func SetupNewMountNamespace(rootfs, console string, readonly bool) error {
-	if err := mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
+	if err := Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
 		return fmt.Errorf("mounting / as slave %s", err)
 	}
 
-	if err := mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
+	if err := Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
 		return fmt.Errorf("mouting %s as bind %s", rootfs, err)
 	}
 
 	if readonly {
-		if err := mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil {
+		if err := Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil {
 			return fmt.Errorf("mounting %s as readonly %s", rootfs, err)
 		}
 	}
@@ -52,29 +52,29 @@ func SetupNewMountNamespace(rootfs, console string, readonly bool) error {
 		return err
 	}
 
-	if err := chdir(rootfs); err != nil {
+	if err := Chdir(rootfs); err != nil {
 		return fmt.Errorf("chdir into %s %s", rootfs, err)
 	}
 
-	if err := mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
+	if err := Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
 		return fmt.Errorf("mount move %s into / %s", rootfs, err)
 	}
 
-	if err := chroot("."); err != nil {
+	if err := Chroot("."); err != nil {
 		return fmt.Errorf("chroot . %s", err)
 	}
 
-	if err := chdir("/"); err != nil {
+	if err := Chdir("/"); err != nil {
 		return fmt.Errorf("chdir / %s", err)
 	}
 
-	umask(0022)
+	Umask(0022)
 
 	return nil
 }
 
 func copyDevNodes(rootfs string) error {
-	umask(0000)
+	Umask(0000)
 
 	for _, node := range []string{
 		"null",
@@ -95,7 +95,7 @@ func copyDevNodes(rootfs string) error {
 		)
 
 		log.Printf("copy %s to %s %d\n", node, dest, st.Rdev)
-		if err := mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
+		if err := Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
 			return fmt.Errorf("copy %s %s", node, err)
 		}
 	}
@@ -125,7 +125,7 @@ func setupDev(rootfs string) error {
 }
 
 func setupConsole(rootfs, console string) error {
-	umask(0000)
+	Umask(0000)
 
 	stat, err := os.Stat(console)
 	if err != nil {
@@ -145,11 +145,11 @@ func setupConsole(rootfs, console string) error {
 		return err
 	}
 
-	if err := mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
+	if err := Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
 		return fmt.Errorf("mknod %s %s", dest, err)
 	}
 
-	if err := mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil {
+	if err := Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil {
 		return fmt.Errorf("bind %s to %s %s", console, dest, err)
 	}
 	return nil
@@ -176,7 +176,7 @@ func mountSystem(rootfs string) error {
 		if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
 			return fmt.Errorf("mkdirall %s %s", m.path, err)
 		}
-		if err := mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
+		if err := Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
 			return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
 		}
 	}
@@ -184,22 +184,22 @@ func mountSystem(rootfs string) error {
 }
 
 func remountProc() error {
-	if err := unmount("/proc", syscall.MNT_DETACH); err != nil {
+	if err := Unmount("/proc", syscall.MNT_DETACH); err != nil {
 		return err
 	}
-	if err := mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil {
+	if err := Mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil {
 		return err
 	}
 	return nil
 }
 
 func remountSys() error {
-	if err := unmount("/sys", syscall.MNT_DETACH); err != nil {
+	if err := Unmount("/sys", syscall.MNT_DETACH); err != nil {
 		if err != syscall.EINVAL {
 			return err
 		}
 	} else {
-		if err := mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil {
+		if err := Mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil {
 			return err
 		}
 	}

+ 1 - 39
pkg/libcontainer/namespaces/namespaces.go

@@ -9,52 +9,14 @@
 package namespaces
 
 import (
-	"fmt"
 	"github.com/dotcloud/docker/pkg/libcontainer"
-	"github.com/dotcloud/docker/pkg/libcontainer/utils"
-	"os"
-	"path/filepath"
-	"syscall"
 )
 
-// CreateNewNamespace creates a new namespace and binds it's fd to the specified path
-func CreateNewNamespace(namespace libcontainer.Namespace, bindTo string) error {
-	var (
-		flag   = namespaceMap[namespace]
-		name   = namespaceFileMap[namespace]
-		nspath = filepath.Join("/proc/self/ns", name)
-	)
-	// TODO: perform validation on name and flag
-
-	pid, err := fork()
-	if err != nil {
-		return err
-	}
-
-	if pid == 0 {
-		if err := unshare(flag); err != nil {
-			writeError("unshare %s", err)
-		}
-		if err := mount(nspath, bindTo, "none", syscall.MS_BIND, ""); err != nil {
-			writeError("bind mount %s", err)
-		}
-		os.Exit(0)
-	}
-	exit, err := utils.WaitOnPid(pid)
-	if err != nil {
-		return err
-	}
-	if exit != 0 {
-		return fmt.Errorf("exit status %d", exit)
-	}
-	return err
-}
-
 // JoinExistingNamespace uses the fd of an existing linux namespace and
 // has the current process join that namespace or the spacespace specified by ns
 func JoinExistingNamespace(fd uintptr, ns libcontainer.Namespace) error {
 	flag := namespaceMap[ns]
-	if err := setns(fd, uintptr(flag)); err != nil {
+	if err := Setns(fd, uintptr(flag)); err != nil {
 		return err
 	}
 	return nil

+ 140 - 0
pkg/libcontainer/namespaces/nsinit/init.go

@@ -0,0 +1,140 @@
+package nsinit
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/pkg/libcontainer/capabilities"
+	"github.com/dotcloud/docker/pkg/libcontainer/namespaces"
+	"log"
+	"os"
+	"path/filepath"
+	"syscall"
+)
+
+// InitNamespace should be run inside an existing namespace to setup
+// common mounts, drop capabilities, and setup network interfaces
+func InitNamespace(container *libcontainer.Container) error {
+	rootfs, err := resolveRootfs(container)
+	if err != nil {
+		return err
+	}
+
+	// any errors encoutered inside the namespace we should write
+	// out to a log or a pipe to our parent and exit(1)
+	// because writing to stderr will not work after we close
+	if err := closeMasterAndStd(container.Master); err != nil {
+		log.Fatalf("close master and std %s", err)
+		return err
+	}
+
+	slave, err := openTerminal(container.Console, syscall.O_RDWR)
+	if err != nil {
+		log.Fatalf("open terminal %s", err)
+		return err
+	}
+	if err := dupSlave(slave); err != nil {
+		log.Fatalf("dup2 slave %s", err)
+		return err
+	}
+
+	/*
+		if container.NetNsFd > 0 {
+			if err := joinExistingNamespace(container.NetNsFd, libcontainer.CLONE_NEWNET); err != nil {
+				log.Fatalf("join existing net namespace %s", err)
+			}
+		}
+	*/
+
+	if _, err := namespaces.Setsid(); err != nil {
+		log.Fatalf("setsid %s", err)
+		return err
+	}
+	if err := namespaces.Setctty(); err != nil {
+		log.Fatalf("setctty %s", err)
+		return err
+	}
+	if err := namespaces.ParentDeathSignal(); err != nil {
+		log.Fatalf("parent deth signal %s", err)
+		return err
+	}
+	if err := namespaces.SetupNewMountNamespace(rootfs, container.Console, container.ReadonlyFs); err != nil {
+		log.Fatalf("setup mount namespace %s", err)
+		return err
+	}
+	if err := namespaces.Sethostname(container.ID); err != nil {
+		log.Fatalf("sethostname %s", err)
+		return err
+	}
+	if err := capabilities.DropCapabilities(container); err != nil {
+		log.Fatalf("drop capabilities %s", err)
+		return err
+	}
+	if err := setupUser(container); err != nil {
+		log.Fatalf("setup user %s", err)
+		return err
+	}
+	if container.WorkingDir != "" {
+		if err := namespaces.Chdir(container.WorkingDir); err != nil {
+			log.Fatalf("chdir to %s %s", container.WorkingDir, err)
+			return err
+		}
+	}
+	if err := namespaces.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil {
+		log.Fatalf("exec %s", err)
+		return err
+	}
+	panic("unreachable")
+}
+
+func resolveRootfs(container *libcontainer.Container) (string, error) {
+	rootfs, err := filepath.Abs(container.RootFs)
+	if err != nil {
+		return "", err
+	}
+	return filepath.EvalSymlinks(rootfs)
+}
+
+func closeMasterAndStd(master uintptr) error {
+	namespaces.Closefd(master)
+	namespaces.Closefd(0)
+	namespaces.Closefd(1)
+	namespaces.Closefd(2)
+
+	return nil
+}
+
+func setupUser(container *libcontainer.Container) error {
+	// TODO: honor user passed on container
+	if err := namespaces.Setgroups(nil); err != nil {
+		return err
+	}
+	if err := namespaces.Setresgid(0, 0, 0); err != nil {
+		return err
+	}
+	if err := namespaces.Setresuid(0, 0, 0); err != nil {
+		return err
+	}
+	return nil
+}
+
+func dupSlave(slave *os.File) error {
+	// we close Stdin,etc so our pty slave should have fd 0
+	if slave.Fd() != 0 {
+		return fmt.Errorf("slave fd not 0 %d", slave.Fd())
+	}
+	if err := namespaces.Dup2(slave.Fd(), 1); err != nil {
+		return err
+	}
+	if err := namespaces.Dup2(slave.Fd(), 2); err != nil {
+		return err
+	}
+	return nil
+}
+
+func openTerminal(name string, flag int) (*os.File, error) {
+	r, e := syscall.Open(name, flag, 0)
+	if e != nil {
+		return nil, &os.PathError{"open", name, e}
+	}
+	return os.NewFile(uintptr(r), name), nil
+}

+ 2 - 22
pkg/libcontainer/namespaces/utils.go

@@ -26,12 +26,6 @@ func addEnvIfNotSet(container *libcontainer.Container, key, value string) {
 	container.Command.Env = append(container.Command.Env, jv)
 }
 
-// print and error to stderr and exit(1)
-func writeError(format string, v ...interface{}) {
-	fmt.Fprintf(os.Stderr, format, v...)
-	os.Exit(1)
-}
-
 // getNsFds inspects the container's namespace configuration and opens the fds to
 // each of the namespaces.
 func getNsFds(container *libcontainer.Container) ([]uintptr, error) {
@@ -79,27 +73,13 @@ func setupEnvironment(container *libcontainer.Container) {
 	addEnvIfNotSet(container, "LOGNAME", "root")
 }
 
-func setupUser(container *libcontainer.Container) error {
-	// TODO: honor user passed on container
-	if err := setgroups(nil); err != nil {
-		return err
-	}
-	if err := setresgid(0, 0, 0); err != nil {
-		return err
-	}
-	if err := setresuid(0, 0, 0); err != nil {
-		return err
-	}
-	return nil
-}
-
 func getMasterAndConsole(container *libcontainer.Container) (string, *os.File, error) {
-	master, err := openpmtx()
+	master, err := Openpmtx()
 	if err != nil {
 		return "", nil, err
 	}
 
-	console, err := ptsname(master)
+	console, err := Ptsname(master)
 	if err != nil {
 		master.Close()
 		return "", nil, err