|
@@ -0,0 +1,266 @@
|
|
|
+/*
|
|
|
+ Higher level convience functions for setting up a container
|
|
|
+*/
|
|
|
+
|
|
|
+package namespaces
|
|
|
+
|
|
|
+import (
|
|
|
+ "errors"
|
|
|
+ "fmt"
|
|
|
+ "github.com/dotcloud/docker/pkg/libcontainer"
|
|
|
+ "github.com/dotcloud/docker/pkg/libcontainer/capabilities"
|
|
|
+ "github.com/dotcloud/docker/pkg/libcontainer/utils"
|
|
|
+ "io"
|
|
|
+ "log"
|
|
|
+ "os"
|
|
|
+ "path/filepath"
|
|
|
+ "syscall"
|
|
|
+)
|
|
|
+
|
|
|
+var (
|
|
|
+ ErrExistingNetworkNamespace = errors.New("specified both CLONE_NEWNET and an existing network namespace")
|
|
|
+)
|
|
|
+
|
|
|
+// Exec will spawn new namespaces with the specified Container configuration
|
|
|
+// in the RootFs path and return the pid of the new containerized process.
|
|
|
+//
|
|
|
+// If an existing network namespace is specified the container
|
|
|
+// will join that namespace. If an existing network namespace is not specified but CLONE_NEWNET is,
|
|
|
+// the container will be spawned with a new network namespace with no configuration. Omiting an
|
|
|
+// existing network namespace and the CLONE_NEWNET option in the container configuration will allow
|
|
|
+// the container to the the host's networking options and configuration.
|
|
|
+func Exec(container *libcontainer.Container) (pid int, err error) {
|
|
|
+ // a user cannot pass CLONE_NEWNET and an existing net namespace fd to join
|
|
|
+ if container.NetNsFd > 0 && container.Namespaces.Contains(libcontainer.CLONE_NEWNET) {
|
|
|
+ return -1, ErrExistingNetworkNamespace
|
|
|
+ }
|
|
|
+
|
|
|
+ rootfs, err := resolveRootfs(container)
|
|
|
+ if err != nil {
|
|
|
+ return -1, err
|
|
|
+ }
|
|
|
+
|
|
|
+ master, console, err := createMasterAndConsole()
|
|
|
+ if err != nil {
|
|
|
+ return -1, err
|
|
|
+ }
|
|
|
+
|
|
|
+ logger, err := os.OpenFile("/root/logs", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0755)
|
|
|
+ if err != nil {
|
|
|
+ return -1, err
|
|
|
+ }
|
|
|
+ log.SetOutput(logger)
|
|
|
+
|
|
|
+ // we need CLONE_VFORK so we can wait on the child
|
|
|
+ flag := getNamespaceFlags(container.Namespaces) | CLONE_VFORK
|
|
|
+
|
|
|
+ if pid, err = clone(uintptr(flag | SIGCHLD)); err != nil {
|
|
|
+ return -1, fmt.Errorf("error cloning process: %s", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ if pid == 0 {
|
|
|
+ // welcome to your new namespace ;)
|
|
|
+ //
|
|
|
+ // any errors encoutered inside the namespace we should write
|
|
|
+ // out to a log or a pipe to our parent and exit(1)
|
|
|
+ // because writing to stderr will not work after we close
|
|
|
+ if err := closeMasterAndStd(master); err != nil {
|
|
|
+ writeError("close master and std %s", err)
|
|
|
+ }
|
|
|
+ slave, err := openTerminal(console, syscall.O_RDWR)
|
|
|
+ if err != nil {
|
|
|
+ writeError("open terminal %s", err)
|
|
|
+ }
|
|
|
+ if err := dupSlave(slave); err != nil {
|
|
|
+ writeError("dup2 slave %s", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ if container.NetNsFd > 0 {
|
|
|
+ if err := JoinExistingNamespace(container.NetNsFd, libcontainer.CLONE_NEWNET); err != nil {
|
|
|
+ writeError("join existing net namespace %s", err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if _, err := setsid(); err != nil {
|
|
|
+ writeError("setsid %s", err)
|
|
|
+ }
|
|
|
+ if err := setctty(); err != nil {
|
|
|
+ writeError("setctty %s", err)
|
|
|
+ }
|
|
|
+ if err := parentDeathSignal(); err != nil {
|
|
|
+ writeError("parent deth signal %s", err)
|
|
|
+ }
|
|
|
+ if err := SetupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil {
|
|
|
+ writeError("setup mount namespace %s", err)
|
|
|
+ }
|
|
|
+ if err := sethostname(container.ID); err != nil {
|
|
|
+ writeError("sethostname %s", err)
|
|
|
+ }
|
|
|
+ if err := capabilities.DropCapabilities(container); err != nil {
|
|
|
+ writeError("drop capabilities %s", err)
|
|
|
+ }
|
|
|
+ if err := setupUser(container); err != nil {
|
|
|
+ writeError("setup user %s", err)
|
|
|
+ }
|
|
|
+ if container.WorkingDir != "" {
|
|
|
+ if err := chdir(container.WorkingDir); err != nil {
|
|
|
+ writeError("chdir to %s %s", container.WorkingDir, err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if err := exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil {
|
|
|
+ writeError("exec %s", err)
|
|
|
+ }
|
|
|
+ panic("unreachable")
|
|
|
+ }
|
|
|
+
|
|
|
+ go func() {
|
|
|
+ if _, err := io.Copy(os.Stdout, master); err != nil {
|
|
|
+ log.Println(err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ go func() {
|
|
|
+ if _, err := io.Copy(master, os.Stdin); err != nil {
|
|
|
+ log.Println(err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ return pid, nil
|
|
|
+}
|
|
|
+
|
|
|
+// ExecIn will spawn a new command inside an existing container's namespaces. The existing container's
|
|
|
+// pid and namespace configuration is needed along with the specific capabilities that should
|
|
|
+// be dropped once inside the namespace.
|
|
|
+func ExecIn(container *libcontainer.Container, cmd *libcontainer.Command) (int, error) {
|
|
|
+ if container.NsPid <= 0 {
|
|
|
+ return -1, libcontainer.ErrInvalidPid
|
|
|
+ }
|
|
|
+
|
|
|
+ fds, err := getNsFds(container)
|
|
|
+ if err != nil {
|
|
|
+ return -1, err
|
|
|
+ }
|
|
|
+
|
|
|
+ if container.NetNsFd > 0 {
|
|
|
+ fds = append(fds, container.NetNsFd)
|
|
|
+ }
|
|
|
+
|
|
|
+ pid, err := fork()
|
|
|
+ if err != nil {
|
|
|
+ for _, fd := range fds {
|
|
|
+ syscall.Close(int(fd))
|
|
|
+ }
|
|
|
+ return -1, err
|
|
|
+ }
|
|
|
+
|
|
|
+ if pid == 0 {
|
|
|
+ for _, fd := range fds {
|
|
|
+ if fd > 0 {
|
|
|
+ if err := JoinExistingNamespace(fd, ""); err != nil {
|
|
|
+ for _, fd := range fds {
|
|
|
+ syscall.Close(int(fd))
|
|
|
+ }
|
|
|
+ writeError("join existing namespace for %d %s", fd, err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ syscall.Close(int(fd))
|
|
|
+ }
|
|
|
+
|
|
|
+ if container.Namespaces.Contains(libcontainer.CLONE_NEWNS) &&
|
|
|
+ container.Namespaces.Contains(libcontainer.CLONE_NEWPID) {
|
|
|
+ // important:
|
|
|
+ //
|
|
|
+ // we need to fork and unshare so that re can remount proc and sys within
|
|
|
+ // the namespace so the CLONE_NEWPID namespace will take effect
|
|
|
+ // if we don't fork we would end up unmounting proc and sys for the entire
|
|
|
+ // namespace
|
|
|
+ child, err := fork()
|
|
|
+ if err != nil {
|
|
|
+ writeError("fork child %s", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ if child == 0 {
|
|
|
+ if err := unshare(CLONE_NEWNS); err != nil {
|
|
|
+ writeError("unshare newns %s", err)
|
|
|
+ }
|
|
|
+ if err := remountProc(); err != nil {
|
|
|
+ writeError("remount proc %s", err)
|
|
|
+ }
|
|
|
+ if err := remountSys(); err != nil {
|
|
|
+ writeError("remount sys %s", err)
|
|
|
+ }
|
|
|
+ if err := capabilities.DropCapabilities(container); err != nil {
|
|
|
+ writeError("drop caps %s", err)
|
|
|
+ }
|
|
|
+ if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil {
|
|
|
+ writeError("exec %s", err)
|
|
|
+ }
|
|
|
+ panic("unreachable")
|
|
|
+ }
|
|
|
+ exit, err := utils.WaitOnPid(child)
|
|
|
+ if err != nil {
|
|
|
+ writeError("wait on child %s", err)
|
|
|
+ }
|
|
|
+ os.Exit(exit)
|
|
|
+ }
|
|
|
+ if err := exec(cmd.Args[0], cmd.Args[0:], cmd.Env); err != nil {
|
|
|
+ writeError("exec %s", err)
|
|
|
+ }
|
|
|
+ panic("unreachable")
|
|
|
+ }
|
|
|
+ return pid, err
|
|
|
+}
|
|
|
+
|
|
|
+func resolveRootfs(container *libcontainer.Container) (string, error) {
|
|
|
+ rootfs, err := filepath.Abs(container.RootFs)
|
|
|
+ if err != nil {
|
|
|
+ return "", err
|
|
|
+ }
|
|
|
+ return filepath.EvalSymlinks(rootfs)
|
|
|
+}
|
|
|
+
|
|
|
+func createMasterAndConsole() (*os.File, string, error) {
|
|
|
+ master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
|
|
|
+ if err != nil {
|
|
|
+ return nil, "", err
|
|
|
+ }
|
|
|
+
|
|
|
+ console, err := ptsname(master)
|
|
|
+ if err != nil {
|
|
|
+ return nil, "", err
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := unlockpt(master); err != nil {
|
|
|
+ return nil, "", err
|
|
|
+ }
|
|
|
+ return master, console, nil
|
|
|
+}
|
|
|
+
|
|
|
+func closeMasterAndStd(master *os.File) error {
|
|
|
+ closefd(master.Fd())
|
|
|
+ closefd(0)
|
|
|
+ closefd(1)
|
|
|
+ closefd(2)
|
|
|
+
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+func dupSlave(slave *os.File) error {
|
|
|
+ // we close Stdin,etc so our pty slave should have fd 0
|
|
|
+ if slave.Fd() != 0 {
|
|
|
+ return fmt.Errorf("slave fd not 0 %d", slave.Fd())
|
|
|
+ }
|
|
|
+ if err := dup2(slave.Fd(), 1); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ if err := dup2(slave.Fd(), 2); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+func openTerminal(name string, flag int) (*os.File, error) {
|
|
|
+ r, e := syscall.Open(name, flag, 0)
|
|
|
+ if e != nil {
|
|
|
+ return nil, &os.PathError{"open", name, e}
|
|
|
+ }
|
|
|
+ return os.NewFile(uintptr(r), name), nil
|
|
|
+}
|