瀏覽代碼

Adding initial version of C-based nsenter for allowing execin in
libcontainer.

Docker-DCO-1.1-Signed-off-by: Victor Marmol <vmarmol@google.com> (github: vmarmol)

Victor Marmol 11 年之前
父節點
當前提交
0a725ea282
共有 3 個文件被更改,包括 223 次插入96 次删除
  1. 25 89
      pkg/libcontainer/namespaces/execin.go
  2. 142 0
      pkg/libcontainer/namespaces/nsenter.go
  3. 56 7
      pkg/libcontainer/nsinit/main.go

+ 25 - 89
pkg/libcontainer/namespaces/execin.go

@@ -3,119 +3,55 @@
 package namespaces
 package namespaces
 
 
 import (
 import (
-	"fmt"
+	"encoding/json"
 	"os"
 	"os"
-	"path/filepath"
 	"strconv"
 	"strconv"
-	"syscall"
 
 
 	"github.com/dotcloud/docker/pkg/label"
 	"github.com/dotcloud/docker/pkg/label"
 	"github.com/dotcloud/docker/pkg/libcontainer"
 	"github.com/dotcloud/docker/pkg/libcontainer"
-	"github.com/dotcloud/docker/pkg/libcontainer/mount"
 	"github.com/dotcloud/docker/pkg/system"
 	"github.com/dotcloud/docker/pkg/system"
 )
 )
 
 
 // ExecIn uses an existing pid and joins the pid's namespaces with the new command.
 // ExecIn uses an existing pid and joins the pid's namespaces with the new command.
-func ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) {
+func ExecIn(container *libcontainer.Container, nspid int, args []string) error {
 	// clear the current processes env and replace it with the environment
 	// clear the current processes env and replace it with the environment
 	// defined on the container
 	// defined on the container
 	if err := LoadContainerEnvironment(container); err != nil {
 	if err := LoadContainerEnvironment(container); err != nil {
-		return -1, err
+		return err
 	}
 	}
 
 
-	for key, enabled := range container.Namespaces {
-		// skip the PID namespace on unshare because it it not supported
-		if enabled && key != "NEWPID" {
-			if ns := libcontainer.GetNamespace(key); ns != nil {
-				if err := system.Unshare(ns.Value); err != nil {
-					return -1, err
-				}
-			}
-		}
-	}
-	fds, err := getNsFds(nspid, container)
-	closeFds := func() {
-		for _, f := range fds {
-			system.Closefd(f)
-		}
-	}
+	// TODO(vmarmol): If this gets too long, send it over a pipe to the child.
+	// Marshall the container into JSON since it won't be available in the namespace.
+	containerJson, err := json.Marshal(container)
 	if err != nil {
 	if err != nil {
-		closeFds()
-		return -1, err
+		return err
 	}
 	}
+
+	// TODO(vmarmol): Move this to the container JSON.
 	processLabel, err := label.GetPidCon(nspid)
 	processLabel, err := label.GetPidCon(nspid)
 	if err != nil {
 	if err != nil {
-		closeFds()
-		return -1, err
-	}
-	// foreach namespace fd, use setns to join an existing container's namespaces
-	for _, fd := range fds {
-		if fd > 0 {
-			if err := system.Setns(fd, 0); err != nil {
-				closeFds()
-				return -1, fmt.Errorf("setns %s", err)
-			}
-		}
-		system.Closefd(fd)
+		return err
 	}
 	}
 
 
-	// if the container has a new pid and mount namespace we need to
-	// remount proc and sys to pick up the changes
-	if container.Namespaces["NEWNS"] && container.Namespaces["NEWPID"] {
-		pid, err := system.Fork()
-		if err != nil {
-			return -1, err
-		}
-		if pid == 0 {
-			// TODO: make all raw syscalls to be fork safe
-			if err := system.Unshare(syscall.CLONE_NEWNS); err != nil {
-				return -1, err
-			}
-			if err := mount.RemountProc(); err != nil {
-				return -1, fmt.Errorf("remount proc %s", err)
-			}
-			if err := mount.RemountSys(); err != nil {
-				return -1, fmt.Errorf("remount sys %s", err)
-			}
-			goto dropAndExec
-		}
-		proc, err := os.FindProcess(pid)
-		if err != nil {
-			return -1, err
-		}
-		state, err := proc.Wait()
-		if err != nil {
-			return -1, err
-		}
-		os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus())
+	// Enter the namespace and then finish setup
+	finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)}
+	finalArgs = append(finalArgs, args...)
+	if err := system.Execv(finalArgs[0], finalArgs[0:], container.Env); err != nil {
+		return err
 	}
 	}
-dropAndExec:
+	panic("unreachable")
+}
+
+// NsEnter is run after entering the namespace.
+func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error {
 	if err := FinalizeNamespace(container); err != nil {
 	if err := FinalizeNamespace(container); err != nil {
-		return -1, err
+		return err
 	}
 	}
-	err = label.SetProcessLabel(processLabel)
-	if err != nil {
-		return -1, err
+	if err := label.SetProcessLabel(processLabel); err != nil {
+		return err
 	}
 	}
-	if err := system.Execv(args[0], args[0:], container.Env); err != nil {
-		return -1, err
+	if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
+		return err
 	}
 	}
 	panic("unreachable")
 	panic("unreachable")
 }
 }
-
-func getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) {
-	fds := []uintptr{}
-
-	for key, enabled := range container.Namespaces {
-		if enabled {
-			if ns := libcontainer.GetNamespace(key); ns != nil {
-				f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", ns.File), os.O_RDONLY, 0)
-				if err != nil {
-					return fds, err
-				}
-				fds = append(fds, f.Fd())
-			}
-		}
-	}
-	return fds, nil
-}

+ 142 - 0
pkg/libcontainer/namespaces/nsenter.go

@@ -0,0 +1,142 @@
+package namespaces
+
+/*
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static const kBufSize = 256;
+
+void get_args(int *argc, char ***argv) {
+	// Read argv
+	int fd = open("/proc/self/cmdline", O_RDONLY);
+
+	// Read the whole commandline.
+	ssize_t contents_size = 0;
+	ssize_t contents_offset = 0;
+	char *contents = NULL;
+	ssize_t bytes_read = 0;
+	do {
+		contents_size += kBufSize;
+		contents = (char *) realloc(contents, contents_size);
+		bytes_read = read(fd, contents + contents_offset, contents_size - contents_offset);
+		contents_offset += bytes_read;
+	} while (bytes_read > 0);
+	close(fd);
+
+	// Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args.
+	ssize_t i;
+	*argc = 0;
+	for (i = 0; i < contents_offset; i++) {
+		if (contents[i] == '\0') {
+			(*argc)++;
+		}
+	}
+	*argv = (char **) malloc(sizeof(char *) * ((*argc) + 1));
+	int idx;
+	for (idx = 0; idx < (*argc); idx++) {
+		(*argv)[idx] = contents;
+		contents += strlen(contents) + 1;
+	}
+	(*argv)[*argc] = NULL;
+}
+
+void nsenter() {
+	int argc;
+	char **argv;
+	get_args(&argc, &argv);
+
+	// Ignore if this is not for us.
+	if (argc < 2 || strcmp(argv[1], "nsenter") != 0) {
+		return;
+	}
+
+	// USAGE: <binary> nsenter <PID> <process label> <container JSON> <argv>...
+	if (argc < 6) {
+		fprintf(stderr, "nsenter: Incorrect usage, not enough arguments\n");
+		exit(1);
+	}
+	pid_t init_pid = strtol(argv[2], NULL, 10);
+	if (errno != 0 || init_pid <= 0) {
+		fprintf(stderr, "nsenter: Failed to parse PID from \"%s\" with error: \"%s\"\n", argv[2], strerror(errno));
+		exit(1);
+	}
+	argc -= 3;
+	argv += 3;
+
+	// Setns on all supported namespaces.
+	char ns_dir[kBufSize];
+	memset(ns_dir, 0, kBufSize);
+	if (snprintf(ns_dir, kBufSize - 1, "/proc/%d/ns/", init_pid) < 0) {
+		fprintf(stderr, "nsenter: Error getting ns dir path with error: \"%s\"\n", strerror(errno));
+		exit(1);
+	}
+	struct dirent *dent;
+	DIR *dir = opendir(ns_dir);
+	if (dir == NULL) {
+		fprintf(stderr, "nsenter: Failed to open directory \"%s\" with error: \"%s\"\n", ns_dir, strerror(errno));
+		exit(1);
+	}
+	while((dent = readdir(dir)) != NULL) {
+		if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) {
+			continue;
+		}
+
+		// Get and open the namespace for the init we are joining..
+		char buf[kBufSize];
+		memset(buf, 0, kBufSize);
+		strncat(buf, ns_dir, kBufSize - 1);
+		strncat(buf, dent->d_name, kBufSize - 1);
+		int fd = open(buf, O_RDONLY);
+		if (fd == -1) {
+			fprintf(stderr, "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", buf, dent->d_name, strerror(errno));
+			exit(1);
+		}
+
+		// Set the namespace.
+		if (setns(fd, 0) == -1) {
+			fprintf(stderr, "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", dent->d_name, strerror(errno));
+			exit(1);
+		}
+		close(fd);
+	}
+	closedir(dir);
+
+	// We must fork to actually enter the PID namespace.
+	int child = fork();
+	if (child == 0) {
+		// Finish executing, let the Go runtime take over.
+		return;
+	} else {
+		// Parent, wait for the child.
+		int status = 0;
+		if (waitpid(child, &status, 0) == -1) {
+			fprintf(stderr, "nsenter: Failed to waitpid with error: \"%s\"\n", strerror(errno));
+			exit(1);
+		}
+
+		// Forward the child's exit code or re-send its death signal.
+		if (WIFEXITED(status)) {
+			exit(WEXITSTATUS(status));
+		} else if (WIFSIGNALED(status)) {
+			kill(getpid(), WTERMSIG(status));
+		}
+		exit(1);
+	}
+
+	return;
+}
+
+__attribute__((constructor)) init() {
+	nsenter();
+}
+*/
+import "C"

+ 56 - 7
pkg/libcontainer/nsinit/main.go

@@ -27,20 +27,20 @@ func main() {
 		log.Fatalf("invalid number of arguments %d", len(os.Args))
 		log.Fatalf("invalid number of arguments %d", len(os.Args))
 	}
 	}
 
 
-	container, err := loadContainer()
-	if err != nil {
-		log.Fatalf("unable to load container: %s", err)
-	}
-
 	switch os.Args[1] {
 	switch os.Args[1] {
 	case "exec": // this is executed outside of the namespace in the cwd
 	case "exec": // this is executed outside of the namespace in the cwd
+		container, err := loadContainer()
+		if err != nil {
+			log.Fatalf("unable to load container: %s", err)
+		}
+
 		var nspid, exitCode int
 		var nspid, exitCode int
 		if nspid, err = readPid(); err != nil && !os.IsNotExist(err) {
 		if nspid, err = readPid(); err != nil && !os.IsNotExist(err) {
 			log.Fatalf("unable to read pid: %s", err)
 			log.Fatalf("unable to read pid: %s", err)
 		}
 		}
 
 
 		if nspid > 0 {
 		if nspid > 0 {
-			exitCode, err = namespaces.ExecIn(container, nspid, os.Args[2:])
+			err = namespaces.ExecIn(container, nspid, os.Args[2:])
 		} else {
 		} else {
 			term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
 			term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
 			exitCode, err = startContainer(container, term, dataPath, os.Args[2:])
 			exitCode, err = startContainer(container, term, dataPath, os.Args[2:])
@@ -50,7 +50,36 @@ func main() {
 			log.Fatalf("failed to exec: %s", err)
 			log.Fatalf("failed to exec: %s", err)
 		}
 		}
 		os.Exit(exitCode)
 		os.Exit(exitCode)
+	case "nsenter": // this is executed inside the namespace.
+		// nsinit nsenter <pid> <process label> <container JSON> <cmd>...
+		if len(os.Args) < 6 {
+			log.Fatalf("incorrect usage: nsinit nsenter <pid> <process label> <container JSON> <cmd>...")
+		}
+
+		container, err := loadContainerFromJson(os.Args[4])
+		if err != nil {
+			log.Fatalf("unable to load container: %s", err)
+		}
+
+		nspid, err := strconv.Atoi(os.Args[2])
+		if err != nil {
+			log.Fatalf("unable to read pid: %s from %q", err, os.Args[2])
+		}
+
+		if nspid <= 0 {
+			log.Fatalf("cannot enter into namespaces without valid pid: %q", nspid)
+		}
+
+		err = namespaces.NsEnter(container, os.Args[3], nspid, os.Args[5:])
+		if err != nil {
+			log.Fatalf("failed to nsenter: %s", err)
+		}
 	case "init": // this is executed inside of the namespace to setup the container
 	case "init": // this is executed inside of the namespace to setup the container
+		container, err := loadContainer()
+		if err != nil {
+			log.Fatalf("unable to load container: %s", err)
+		}
+
 		// by default our current dir is always our rootfs
 		// by default our current dir is always our rootfs
 		rootfs, err := os.Getwd()
 		rootfs, err := os.Getwd()
 		if err != nil {
 		if err != nil {
@@ -70,6 +99,11 @@ func main() {
 			log.Fatalf("unable to initialize for container: %s", err)
 			log.Fatalf("unable to initialize for container: %s", err)
 		}
 		}
 	case "stats":
 	case "stats":
+		container, err := loadContainer()
+		if err != nil {
+			log.Fatalf("unable to load container: %s", err)
+		}
+
 		// returns the stats of the current container.
 		// returns the stats of the current container.
 		stats, err := getContainerStats(container)
 		stats, err := getContainerStats(container)
 		if err != nil {
 		if err != nil {
@@ -80,6 +114,11 @@ func main() {
 		os.Exit(0)
 		os.Exit(0)
 
 
 	case "spec":
 	case "spec":
+		container, err := loadContainer()
+		if err != nil {
+			log.Fatalf("unable to load container: %s", err)
+		}
+
 		// returns the spec of the current container.
 		// returns the spec of the current container.
 		spec, err := getContainerSpec(container)
 		spec, err := getContainerSpec(container)
 		if err != nil {
 		if err != nil {
@@ -90,13 +129,14 @@ func main() {
 		os.Exit(0)
 		os.Exit(0)
 
 
 	default:
 	default:
-		log.Fatalf("command not supported for nsinit %s", os.Args[0])
+		log.Fatalf("command not supported for nsinit %s", os.Args[1])
 	}
 	}
 }
 }
 
 
 func loadContainer() (*libcontainer.Container, error) {
 func loadContainer() (*libcontainer.Container, error) {
 	f, err := os.Open(filepath.Join(dataPath, "container.json"))
 	f, err := os.Open(filepath.Join(dataPath, "container.json"))
 	if err != nil {
 	if err != nil {
+		log.Printf("Path: %q", filepath.Join(dataPath, "container.json"))
 		return nil, err
 		return nil, err
 	}
 	}
 	defer f.Close()
 	defer f.Close()
@@ -108,6 +148,15 @@ func loadContainer() (*libcontainer.Container, error) {
 	return container, nil
 	return container, nil
 }
 }
 
 
+func loadContainerFromJson(rawData string) (*libcontainer.Container, error) {
+	container := &libcontainer.Container{}
+	err := json.Unmarshal([]byte(rawData), container)
+	if err != nil {
+		return nil, err
+	}
+	return container, nil
+}
+
 func readPid() (int, error) {
 func readPid() (int, error) {
 	data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid"))
 	data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid"))
 	if err != nil {
 	if err != nil {