From 0a725ea28259f8a0f9a1de5730fd99675b942dde Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Sat, 24 May 2014 01:06:14 +0000 Subject: [PATCH 1/3] Adding initial version of C-based nsenter for allowing execin in libcontainer. Docker-DCO-1.1-Signed-off-by: Victor Marmol (github: vmarmol) --- pkg/libcontainer/namespaces/execin.go | 116 +++++--------------- pkg/libcontainer/namespaces/nsenter.go | 142 +++++++++++++++++++++++++ pkg/libcontainer/nsinit/main.go | 63 +++++++++-- 3 files changed, 224 insertions(+), 97 deletions(-) create mode 100644 pkg/libcontainer/namespaces/nsenter.go diff --git a/pkg/libcontainer/namespaces/execin.go b/pkg/libcontainer/namespaces/execin.go index 09bf40582a..699c67dbc7 100644 --- a/pkg/libcontainer/namespaces/execin.go +++ b/pkg/libcontainer/namespaces/execin.go @@ -3,119 +3,55 @@ package namespaces import ( - "fmt" + "encoding/json" "os" - "path/filepath" "strconv" - "syscall" "github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/libcontainer" - "github.com/dotcloud/docker/pkg/libcontainer/mount" "github.com/dotcloud/docker/pkg/system" ) // ExecIn uses an existing pid and joins the pid's namespaces with the new command. -func ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) { +func ExecIn(container *libcontainer.Container, nspid int, args []string) error { // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { - return -1, err + return err } - for key, enabled := range container.Namespaces { - // skip the PID namespace on unshare because it it not supported - if enabled && key != "NEWPID" { - if ns := libcontainer.GetNamespace(key); ns != nil { - if err := system.Unshare(ns.Value); err != nil { - return -1, err - } - } - } - } - fds, err := getNsFds(nspid, container) - closeFds := func() { - for _, f := range fds { - system.Closefd(f) - } - } + // TODO(vmarmol): If this gets too long, send it over a pipe to the child. + // Marshall the container into JSON since it won't be available in the namespace. + containerJson, err := json.Marshal(container) if err != nil { - closeFds() - return -1, err + return err } + + // TODO(vmarmol): Move this to the container JSON. processLabel, err := label.GetPidCon(nspid) if err != nil { - closeFds() - return -1, err - } - // foreach namespace fd, use setns to join an existing container's namespaces - for _, fd := range fds { - if fd > 0 { - if err := system.Setns(fd, 0); err != nil { - closeFds() - return -1, fmt.Errorf("setns %s", err) - } - } - system.Closefd(fd) + return err } - // if the container has a new pid and mount namespace we need to - // remount proc and sys to pick up the changes - if container.Namespaces["NEWNS"] && container.Namespaces["NEWPID"] { - pid, err := system.Fork() - if err != nil { - return -1, err - } - if pid == 0 { - // TODO: make all raw syscalls to be fork safe - if err := system.Unshare(syscall.CLONE_NEWNS); err != nil { - return -1, err - } - if err := mount.RemountProc(); err != nil { - return -1, fmt.Errorf("remount proc %s", err) - } - if err := mount.RemountSys(); err != nil { - return -1, fmt.Errorf("remount sys %s", err) - } - goto dropAndExec - } - proc, err := os.FindProcess(pid) - if err != nil { - return -1, err - } - state, err := proc.Wait() - if err != nil { - return -1, err - } - os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus()) - } -dropAndExec: - if err := FinalizeNamespace(container); err != nil { - return -1, err - } - err = label.SetProcessLabel(processLabel) - if err != nil { - return -1, err - } - if err := system.Execv(args[0], args[0:], container.Env); err != nil { - return -1, err + // Enter the namespace and then finish setup + finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)} + finalArgs = append(finalArgs, args...) + if err := system.Execv(finalArgs[0], finalArgs[0:], container.Env); err != nil { + return err } panic("unreachable") } -func getNsFds(pid int, container *libcontainer.Container) ([]uintptr, error) { - fds := []uintptr{} - - for key, enabled := range container.Namespaces { - if enabled { - if ns := libcontainer.GetNamespace(key); ns != nil { - f, err := os.OpenFile(filepath.Join("/proc/", strconv.Itoa(pid), "ns", ns.File), os.O_RDONLY, 0) - if err != nil { - return fds, err - } - fds = append(fds, f.Fd()) - } - } +// NsEnter is run after entering the namespace. +func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error { + if err := FinalizeNamespace(container); err != nil { + return err } - return fds, nil + if err := label.SetProcessLabel(processLabel); err != nil { + return err + } + if err := system.Execv(args[0], args[0:], os.Environ()); err != nil { + return err + } + panic("unreachable") } diff --git a/pkg/libcontainer/namespaces/nsenter.go b/pkg/libcontainer/namespaces/nsenter.go new file mode 100644 index 0000000000..c5dd2e7953 --- /dev/null +++ b/pkg/libcontainer/namespaces/nsenter.go @@ -0,0 +1,142 @@ +package namespaces + +/* +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const kBufSize = 256; + +void get_args(int *argc, char ***argv) { + // Read argv + int fd = open("/proc/self/cmdline", O_RDONLY); + + // Read the whole commandline. + ssize_t contents_size = 0; + ssize_t contents_offset = 0; + char *contents = NULL; + ssize_t bytes_read = 0; + do { + contents_size += kBufSize; + contents = (char *) realloc(contents, contents_size); + bytes_read = read(fd, contents + contents_offset, contents_size - contents_offset); + contents_offset += bytes_read; + } while (bytes_read > 0); + close(fd); + + // Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args. + ssize_t i; + *argc = 0; + for (i = 0; i < contents_offset; i++) { + if (contents[i] == '\0') { + (*argc)++; + } + } + *argv = (char **) malloc(sizeof(char *) * ((*argc) + 1)); + int idx; + for (idx = 0; idx < (*argc); idx++) { + (*argv)[idx] = contents; + contents += strlen(contents) + 1; + } + (*argv)[*argc] = NULL; +} + +void nsenter() { + int argc; + char **argv; + get_args(&argc, &argv); + + // Ignore if this is not for us. + if (argc < 2 || strcmp(argv[1], "nsenter") != 0) { + return; + } + + // USAGE: nsenter ... + if (argc < 6) { + fprintf(stderr, "nsenter: Incorrect usage, not enough arguments\n"); + exit(1); + } + pid_t init_pid = strtol(argv[2], NULL, 10); + if (errno != 0 || init_pid <= 0) { + fprintf(stderr, "nsenter: Failed to parse PID from \"%s\" with error: \"%s\"\n", argv[2], strerror(errno)); + exit(1); + } + argc -= 3; + argv += 3; + + // Setns on all supported namespaces. + char ns_dir[kBufSize]; + memset(ns_dir, 0, kBufSize); + if (snprintf(ns_dir, kBufSize - 1, "/proc/%d/ns/", init_pid) < 0) { + fprintf(stderr, "nsenter: Error getting ns dir path with error: \"%s\"\n", strerror(errno)); + exit(1); + } + struct dirent *dent; + DIR *dir = opendir(ns_dir); + if (dir == NULL) { + fprintf(stderr, "nsenter: Failed to open directory \"%s\" with error: \"%s\"\n", ns_dir, strerror(errno)); + exit(1); + } + while((dent = readdir(dir)) != NULL) { + if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) { + continue; + } + + // Get and open the namespace for the init we are joining.. + char buf[kBufSize]; + memset(buf, 0, kBufSize); + strncat(buf, ns_dir, kBufSize - 1); + strncat(buf, dent->d_name, kBufSize - 1); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", buf, dent->d_name, strerror(errno)); + exit(1); + } + + // Set the namespace. + if (setns(fd, 0) == -1) { + fprintf(stderr, "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", dent->d_name, strerror(errno)); + exit(1); + } + close(fd); + } + closedir(dir); + + // We must fork to actually enter the PID namespace. + int child = fork(); + if (child == 0) { + // Finish executing, let the Go runtime take over. + return; + } else { + // Parent, wait for the child. + int status = 0; + if (waitpid(child, &status, 0) == -1) { + fprintf(stderr, "nsenter: Failed to waitpid with error: \"%s\"\n", strerror(errno)); + exit(1); + } + + // Forward the child's exit code or re-send its death signal. + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + kill(getpid(), WTERMSIG(status)); + } + exit(1); + } + + return; +} + +__attribute__((constructor)) init() { + nsenter(); +} +*/ +import "C" diff --git a/pkg/libcontainer/nsinit/main.go b/pkg/libcontainer/nsinit/main.go index 6659a1310e..bddc1992fd 100644 --- a/pkg/libcontainer/nsinit/main.go +++ b/pkg/libcontainer/nsinit/main.go @@ -27,20 +27,20 @@ func main() { log.Fatalf("invalid number of arguments %d", len(os.Args)) } - container, err := loadContainer() - if err != nil { - log.Fatalf("unable to load container: %s", err) - } - switch os.Args[1] { case "exec": // this is executed outside of the namespace in the cwd + container, err := loadContainer() + if err != nil { + log.Fatalf("unable to load container: %s", err) + } + var nspid, exitCode int if nspid, err = readPid(); err != nil && !os.IsNotExist(err) { log.Fatalf("unable to read pid: %s", err) } if nspid > 0 { - exitCode, err = namespaces.ExecIn(container, nspid, os.Args[2:]) + err = namespaces.ExecIn(container, nspid, os.Args[2:]) } else { term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty) exitCode, err = startContainer(container, term, dataPath, os.Args[2:]) @@ -50,7 +50,36 @@ func main() { log.Fatalf("failed to exec: %s", err) } os.Exit(exitCode) + case "nsenter": // this is executed inside the namespace. + // nsinit nsenter ... + if len(os.Args) < 6 { + log.Fatalf("incorrect usage: nsinit nsenter ...") + } + + container, err := loadContainerFromJson(os.Args[4]) + if err != nil { + log.Fatalf("unable to load container: %s", err) + } + + nspid, err := strconv.Atoi(os.Args[2]) + if err != nil { + log.Fatalf("unable to read pid: %s from %q", err, os.Args[2]) + } + + if nspid <= 0 { + log.Fatalf("cannot enter into namespaces without valid pid: %q", nspid) + } + + err = namespaces.NsEnter(container, os.Args[3], nspid, os.Args[5:]) + if err != nil { + log.Fatalf("failed to nsenter: %s", err) + } case "init": // this is executed inside of the namespace to setup the container + container, err := loadContainer() + if err != nil { + log.Fatalf("unable to load container: %s", err) + } + // by default our current dir is always our rootfs rootfs, err := os.Getwd() if err != nil { @@ -70,6 +99,11 @@ func main() { log.Fatalf("unable to initialize for container: %s", err) } case "stats": + container, err := loadContainer() + if err != nil { + log.Fatalf("unable to load container: %s", err) + } + // returns the stats of the current container. stats, err := getContainerStats(container) if err != nil { @@ -80,6 +114,11 @@ func main() { os.Exit(0) case "spec": + container, err := loadContainer() + if err != nil { + log.Fatalf("unable to load container: %s", err) + } + // returns the spec of the current container. spec, err := getContainerSpec(container) if err != nil { @@ -90,13 +129,14 @@ func main() { os.Exit(0) default: - log.Fatalf("command not supported for nsinit %s", os.Args[0]) + log.Fatalf("command not supported for nsinit %s", os.Args[1]) } } func loadContainer() (*libcontainer.Container, error) { f, err := os.Open(filepath.Join(dataPath, "container.json")) if err != nil { + log.Printf("Path: %q", filepath.Join(dataPath, "container.json")) return nil, err } defer f.Close() @@ -108,6 +148,15 @@ func loadContainer() (*libcontainer.Container, error) { return container, nil } +func loadContainerFromJson(rawData string) (*libcontainer.Container, error) { + container := &libcontainer.Container{} + err := json.Unmarshal([]byte(rawData), container) + if err != nil { + return nil, err + } + return container, nil +} + func readPid() (int, error) { data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid")) if err != nil { From 8497d1274b046804999699ccb66b11a3249906a1 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 4 Jun 2014 17:54:00 -0700 Subject: [PATCH 2/3] Move env load to nsenter Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/namespaces/execin.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pkg/libcontainer/namespaces/execin.go b/pkg/libcontainer/namespaces/execin.go index 699c67dbc7..4d5671e778 100644 --- a/pkg/libcontainer/namespaces/execin.go +++ b/pkg/libcontainer/namespaces/execin.go @@ -14,12 +14,6 @@ import ( // ExecIn uses an existing pid and joins the pid's namespaces with the new command. func ExecIn(container *libcontainer.Container, nspid int, args []string) error { - // clear the current processes env and replace it with the environment - // defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - // TODO(vmarmol): If this gets too long, send it over a pipe to the child. // Marshall the container into JSON since it won't be available in the namespace. containerJson, err := json.Marshal(container) @@ -36,7 +30,7 @@ func ExecIn(container *libcontainer.Container, nspid int, args []string) error { // Enter the namespace and then finish setup finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)} finalArgs = append(finalArgs, args...) - if err := system.Execv(finalArgs[0], finalArgs[0:], container.Env); err != nil { + if err := system.Execv(finalArgs[0], finalArgs[0:], os.Environ()); err != nil { return err } panic("unreachable") @@ -44,13 +38,18 @@ func ExecIn(container *libcontainer.Container, nspid int, args []string) error { // NsEnter is run after entering the namespace. func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error { + // clear the current processes env and replace it with the environment + // defined on the container + if err := LoadContainerEnvironment(container); err != nil { + return err + } if err := FinalizeNamespace(container); err != nil { return err } if err := label.SetProcessLabel(processLabel); err != nil { return err } - if err := system.Execv(args[0], args[0:], os.Environ()); err != nil { + if err := system.Execv(args[0], args[0:], container.Env); err != nil { return err } panic("unreachable") From 7eb508633db93213404292bd4fd21b6855f45bea Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 4 Jun 2014 18:03:17 -0700 Subject: [PATCH 3/3] Exclude the user namespace for setns Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/namespaces/nsenter.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/libcontainer/namespaces/nsenter.go b/pkg/libcontainer/namespaces/nsenter.go index c5dd2e7953..d5c2e761b7 100644 --- a/pkg/libcontainer/namespaces/nsenter.go +++ b/pkg/libcontainer/namespaces/nsenter.go @@ -85,8 +85,9 @@ void nsenter() { fprintf(stderr, "nsenter: Failed to open directory \"%s\" with error: \"%s\"\n", ns_dir, strerror(errno)); exit(1); } + while((dent = readdir(dir)) != NULL) { - if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) { + if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || strcmp(dent->d_name, "user") == 0) { continue; }