init.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. // +build linux
  2. package namespaces
  3. import (
  4. "fmt"
  5. "os"
  6. "runtime"
  7. "strings"
  8. "syscall"
  9. "github.com/docker/libcontainer"
  10. "github.com/docker/libcontainer/apparmor"
  11. "github.com/docker/libcontainer/console"
  12. "github.com/docker/libcontainer/label"
  13. "github.com/docker/libcontainer/mount"
  14. "github.com/docker/libcontainer/netlink"
  15. "github.com/docker/libcontainer/network"
  16. "github.com/docker/libcontainer/security/capabilities"
  17. "github.com/docker/libcontainer/security/restrict"
  18. "github.com/docker/libcontainer/syncpipe"
  19. "github.com/docker/libcontainer/utils"
  20. "github.com/dotcloud/docker/pkg/system"
  21. "github.com/dotcloud/docker/pkg/user"
  22. )
  23. // TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
  24. // Move this to libcontainer package.
  25. // Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
  26. // and other options required for the new container.
  27. func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *syncpipe.SyncPipe, args []string) (err error) {
  28. defer func() {
  29. if err != nil {
  30. syncPipe.ReportChildError(err)
  31. }
  32. }()
  33. rootfs, err := utils.ResolveRootfs(uncleanRootfs)
  34. if err != nil {
  35. return err
  36. }
  37. // clear the current processes env and replace it with the environment
  38. // defined on the container
  39. if err := LoadContainerEnvironment(container); err != nil {
  40. return err
  41. }
  42. // We always read this as it is a way to sync with the parent as well
  43. networkState, err := syncPipe.ReadFromParent()
  44. if err != nil {
  45. return err
  46. }
  47. if consolePath != "" {
  48. if err := console.OpenAndDup(consolePath); err != nil {
  49. return err
  50. }
  51. }
  52. if _, err := system.Setsid(); err != nil {
  53. return fmt.Errorf("setsid %s", err)
  54. }
  55. if consolePath != "" {
  56. if err := system.Setctty(); err != nil {
  57. return fmt.Errorf("setctty %s", err)
  58. }
  59. }
  60. if err := setupNetwork(container, networkState); err != nil {
  61. return fmt.Errorf("setup networking %s", err)
  62. }
  63. if err := setupRoute(container); err != nil {
  64. return fmt.Errorf("setup route %s", err)
  65. }
  66. label.Init()
  67. if err := mount.InitializeMountNamespace(rootfs,
  68. consolePath,
  69. (*mount.MountConfig)(container.MountConfig)); err != nil {
  70. return fmt.Errorf("setup mount namespace %s", err)
  71. }
  72. if container.Hostname != "" {
  73. if err := system.Sethostname(container.Hostname); err != nil {
  74. return fmt.Errorf("sethostname %s", err)
  75. }
  76. }
  77. runtime.LockOSThread()
  78. if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
  79. return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
  80. }
  81. if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
  82. return fmt.Errorf("set process label %s", err)
  83. }
  84. // TODO: (crosbymichael) make this configurable at the Config level
  85. if container.RestrictSys {
  86. if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil {
  87. return err
  88. }
  89. }
  90. pdeathSignal, err := system.GetParentDeathSignal()
  91. if err != nil {
  92. return fmt.Errorf("get parent death signal %s", err)
  93. }
  94. if err := FinalizeNamespace(container); err != nil {
  95. return fmt.Errorf("finalize namespace %s", err)
  96. }
  97. // FinalizeNamespace can change user/group which clears the parent death
  98. // signal, so we restore it here.
  99. if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
  100. return fmt.Errorf("restore parent death signal %s", err)
  101. }
  102. return system.Execv(args[0], args[0:], container.Env)
  103. }
  104. // RestoreParentDeathSignal sets the parent death signal to old.
  105. func RestoreParentDeathSignal(old int) error {
  106. if old == 0 {
  107. return nil
  108. }
  109. current, err := system.GetParentDeathSignal()
  110. if err != nil {
  111. return fmt.Errorf("get parent death signal %s", err)
  112. }
  113. if old == current {
  114. return nil
  115. }
  116. if err := system.ParentDeathSignal(uintptr(old)); err != nil {
  117. return fmt.Errorf("set parent death signal %s", err)
  118. }
  119. // Signal self if parent is already dead. Does nothing if running in a new
  120. // PID namespace, as Getppid will always return 0.
  121. if syscall.Getppid() == 1 {
  122. return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
  123. }
  124. return nil
  125. }
  126. // SetupUser changes the groups, gid, and uid for the user inside the container
  127. func SetupUser(u string) error {
  128. uid, gid, suppGids, err := user.GetUserGroupSupplementary(u, syscall.Getuid(), syscall.Getgid())
  129. if err != nil {
  130. return fmt.Errorf("get supplementary groups %s", err)
  131. }
  132. if err := system.Setgroups(suppGids); err != nil {
  133. return fmt.Errorf("setgroups %s", err)
  134. }
  135. if err := system.Setgid(gid); err != nil {
  136. return fmt.Errorf("setgid %s", err)
  137. }
  138. if err := system.Setuid(uid); err != nil {
  139. return fmt.Errorf("setuid %s", err)
  140. }
  141. return nil
  142. }
  143. // setupVethNetwork uses the Network config if it is not nil to initialize
  144. // the new veth interface inside the container for use by changing the name to eth0
  145. // setting the MTU and IP address along with the default gateway
  146. func setupNetwork(container *libcontainer.Config, networkState *network.NetworkState) error {
  147. for _, config := range container.Networks {
  148. strategy, err := network.GetStrategy(config.Type)
  149. if err != nil {
  150. return err
  151. }
  152. err1 := strategy.Initialize((*network.Network)(config), networkState)
  153. if err1 != nil {
  154. return err1
  155. }
  156. }
  157. return nil
  158. }
  159. func setupRoute(container *libcontainer.Config) error {
  160. for _, config := range container.Routes {
  161. if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
  162. return err
  163. }
  164. }
  165. return nil
  166. }
  167. // FinalizeNamespace drops the caps, sets the correct user
  168. // and working dir, and closes any leaky file descriptors
  169. // before execing the command inside the namespace
  170. func FinalizeNamespace(container *libcontainer.Config) error {
  171. // Ensure that all non-standard fds we may have accidentally
  172. // inherited are marked close-on-exec so they stay out of the
  173. // container
  174. if err := utils.CloseExecFrom(3); err != nil {
  175. return fmt.Errorf("close open file descriptors %s", err)
  176. }
  177. // drop capabilities in bounding set before changing user
  178. if err := capabilities.DropBoundingSet(container.Capabilities); err != nil {
  179. return fmt.Errorf("drop bounding set %s", err)
  180. }
  181. // preserve existing capabilities while we change users
  182. if err := system.SetKeepCaps(); err != nil {
  183. return fmt.Errorf("set keep caps %s", err)
  184. }
  185. if err := SetupUser(container.User); err != nil {
  186. return fmt.Errorf("setup user %s", err)
  187. }
  188. if err := system.ClearKeepCaps(); err != nil {
  189. return fmt.Errorf("clear keep caps %s", err)
  190. }
  191. // drop all other capabilities
  192. if err := capabilities.DropCapabilities(container.Capabilities); err != nil {
  193. return fmt.Errorf("drop capabilities %s", err)
  194. }
  195. if container.WorkingDir != "" {
  196. if err := system.Chdir(container.WorkingDir); err != nil {
  197. return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
  198. }
  199. }
  200. return nil
  201. }
  202. func LoadContainerEnvironment(container *libcontainer.Config) error {
  203. os.Clearenv()
  204. for _, pair := range container.Env {
  205. p := strings.SplitN(pair, "=", 2)
  206. if len(p) < 2 {
  207. return fmt.Errorf("invalid environment '%v'", pair)
  208. }
  209. if err := os.Setenv(p[0], p[1]); err != nil {
  210. return err
  211. }
  212. }
  213. return nil
  214. }