123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252 |
- // +build linux
- package namespaces
- import (
- "fmt"
- "os"
- "runtime"
- "strings"
- "syscall"
- "github.com/docker/libcontainer"
- "github.com/docker/libcontainer/apparmor"
- "github.com/docker/libcontainer/console"
- "github.com/docker/libcontainer/label"
- "github.com/docker/libcontainer/mount"
- "github.com/docker/libcontainer/netlink"
- "github.com/docker/libcontainer/network"
- "github.com/docker/libcontainer/security/capabilities"
- "github.com/docker/libcontainer/security/restrict"
- "github.com/docker/libcontainer/syncpipe"
- "github.com/docker/libcontainer/utils"
- "github.com/dotcloud/docker/pkg/system"
- "github.com/dotcloud/docker/pkg/user"
- )
- // TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
- // Move this to libcontainer package.
- // Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
- // and other options required for the new container.
- func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *syncpipe.SyncPipe, args []string) (err error) {
- defer func() {
- if err != nil {
- syncPipe.ReportChildError(err)
- }
- }()
- rootfs, err := utils.ResolveRootfs(uncleanRootfs)
- if err != nil {
- return err
- }
- // clear the current processes env and replace it with the environment
- // defined on the container
- if err := LoadContainerEnvironment(container); err != nil {
- return err
- }
- // We always read this as it is a way to sync with the parent as well
- networkState, err := syncPipe.ReadFromParent()
- if err != nil {
- return err
- }
- if consolePath != "" {
- if err := console.OpenAndDup(consolePath); err != nil {
- return err
- }
- }
- if _, err := system.Setsid(); err != nil {
- return fmt.Errorf("setsid %s", err)
- }
- if consolePath != "" {
- if err := system.Setctty(); err != nil {
- return fmt.Errorf("setctty %s", err)
- }
- }
- if err := setupNetwork(container, networkState); err != nil {
- return fmt.Errorf("setup networking %s", err)
- }
- if err := setupRoute(container); err != nil {
- return fmt.Errorf("setup route %s", err)
- }
- label.Init()
- if err := mount.InitializeMountNamespace(rootfs,
- consolePath,
- (*mount.MountConfig)(container.MountConfig)); err != nil {
- return fmt.Errorf("setup mount namespace %s", err)
- }
- if container.Hostname != "" {
- if err := system.Sethostname(container.Hostname); err != nil {
- return fmt.Errorf("sethostname %s", err)
- }
- }
- runtime.LockOSThread()
- if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
- return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
- }
- if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
- return fmt.Errorf("set process label %s", err)
- }
- // TODO: (crosbymichael) make this configurable at the Config level
- if container.RestrictSys {
- if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil {
- return err
- }
- }
- pdeathSignal, err := system.GetParentDeathSignal()
- if err != nil {
- return fmt.Errorf("get parent death signal %s", err)
- }
- if err := FinalizeNamespace(container); err != nil {
- return fmt.Errorf("finalize namespace %s", err)
- }
- // FinalizeNamespace can change user/group which clears the parent death
- // signal, so we restore it here.
- if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
- return fmt.Errorf("restore parent death signal %s", err)
- }
- return system.Execv(args[0], args[0:], container.Env)
- }
- // RestoreParentDeathSignal sets the parent death signal to old.
- func RestoreParentDeathSignal(old int) error {
- if old == 0 {
- return nil
- }
- current, err := system.GetParentDeathSignal()
- if err != nil {
- return fmt.Errorf("get parent death signal %s", err)
- }
- if old == current {
- return nil
- }
- if err := system.ParentDeathSignal(uintptr(old)); err != nil {
- return fmt.Errorf("set parent death signal %s", err)
- }
- // Signal self if parent is already dead. Does nothing if running in a new
- // PID namespace, as Getppid will always return 0.
- if syscall.Getppid() == 1 {
- return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
- }
- return nil
- }
- // SetupUser changes the groups, gid, and uid for the user inside the container
- func SetupUser(u string) error {
- uid, gid, suppGids, err := user.GetUserGroupSupplementary(u, syscall.Getuid(), syscall.Getgid())
- if err != nil {
- return fmt.Errorf("get supplementary groups %s", err)
- }
- if err := system.Setgroups(suppGids); err != nil {
- return fmt.Errorf("setgroups %s", err)
- }
- if err := system.Setgid(gid); err != nil {
- return fmt.Errorf("setgid %s", err)
- }
- if err := system.Setuid(uid); err != nil {
- return fmt.Errorf("setuid %s", err)
- }
- return nil
- }
- // setupVethNetwork uses the Network config if it is not nil to initialize
- // the new veth interface inside the container for use by changing the name to eth0
- // setting the MTU and IP address along with the default gateway
- func setupNetwork(container *libcontainer.Config, networkState *network.NetworkState) error {
- for _, config := range container.Networks {
- strategy, err := network.GetStrategy(config.Type)
- if err != nil {
- return err
- }
- err1 := strategy.Initialize((*network.Network)(config), networkState)
- if err1 != nil {
- return err1
- }
- }
- return nil
- }
- func setupRoute(container *libcontainer.Config) error {
- for _, config := range container.Routes {
- if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
- return err
- }
- }
- return nil
- }
- // FinalizeNamespace drops the caps, sets the correct user
- // and working dir, and closes any leaky file descriptors
- // before execing the command inside the namespace
- func FinalizeNamespace(container *libcontainer.Config) error {
- // Ensure that all non-standard fds we may have accidentally
- // inherited are marked close-on-exec so they stay out of the
- // container
- if err := utils.CloseExecFrom(3); err != nil {
- return fmt.Errorf("close open file descriptors %s", err)
- }
- // drop capabilities in bounding set before changing user
- if err := capabilities.DropBoundingSet(container.Capabilities); err != nil {
- return fmt.Errorf("drop bounding set %s", err)
- }
- // preserve existing capabilities while we change users
- if err := system.SetKeepCaps(); err != nil {
- return fmt.Errorf("set keep caps %s", err)
- }
- if err := SetupUser(container.User); err != nil {
- return fmt.Errorf("setup user %s", err)
- }
- if err := system.ClearKeepCaps(); err != nil {
- return fmt.Errorf("clear keep caps %s", err)
- }
- // drop all other capabilities
- if err := capabilities.DropCapabilities(container.Capabilities); err != nil {
- return fmt.Errorf("drop capabilities %s", err)
- }
- if container.WorkingDir != "" {
- if err := system.Chdir(container.WorkingDir); err != nil {
- return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
- }
- }
- return nil
- }
- func LoadContainerEnvironment(container *libcontainer.Config) error {
- os.Clearenv()
- for _, pair := range container.Env {
- p := strings.SplitN(pair, "=", 2)
- if len(p) < 2 {
- return fmt.Errorf("invalid environment '%v'", pair)
- }
- if err := os.Setenv(p[0], p[1]); err != nil {
- return err
- }
- }
- return nil
- }
|