init.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. // +build linux
  2. package namespaces
  3. import (
  4. "fmt"
  5. "os"
  6. "runtime"
  7. "strings"
  8. "syscall"
  9. "github.com/dotcloud/docker/pkg/apparmor"
  10. "github.com/dotcloud/docker/pkg/label"
  11. "github.com/dotcloud/docker/pkg/libcontainer"
  12. "github.com/dotcloud/docker/pkg/libcontainer/console"
  13. "github.com/dotcloud/docker/pkg/libcontainer/mount"
  14. "github.com/dotcloud/docker/pkg/libcontainer/network"
  15. "github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
  16. "github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
  17. "github.com/dotcloud/docker/pkg/libcontainer/utils"
  18. "github.com/dotcloud/docker/pkg/netlink"
  19. "github.com/dotcloud/docker/pkg/system"
  20. "github.com/dotcloud/docker/pkg/user"
  21. )
  22. // Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
  23. // and other options required for the new container.
  24. func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error {
  25. rootfs, err := utils.ResolveRootfs(uncleanRootfs)
  26. if err != nil {
  27. return err
  28. }
  29. // clear the current processes env and replace it with the environment
  30. // defined on the container
  31. if err := LoadContainerEnvironment(container); err != nil {
  32. return err
  33. }
  34. // We always read this as it is a way to sync with the parent as well
  35. context, err := syncPipe.ReadFromParent()
  36. if err != nil {
  37. syncPipe.Close()
  38. return err
  39. }
  40. syncPipe.Close()
  41. if consolePath != "" {
  42. if err := console.OpenAndDup(consolePath); err != nil {
  43. return err
  44. }
  45. }
  46. if _, err := system.Setsid(); err != nil {
  47. return fmt.Errorf("setsid %s", err)
  48. }
  49. if consolePath != "" {
  50. if err := system.Setctty(); err != nil {
  51. return fmt.Errorf("setctty %s", err)
  52. }
  53. }
  54. if err := setupNetwork(container, context); err != nil {
  55. return fmt.Errorf("setup networking %s", err)
  56. }
  57. if err := setupRoute(container); err != nil {
  58. return fmt.Errorf("setup route %s", err)
  59. }
  60. label.Init()
  61. if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil {
  62. return fmt.Errorf("setup mount namespace %s", err)
  63. }
  64. if container.Hostname != "" {
  65. if err := system.Sethostname(container.Hostname); err != nil {
  66. return fmt.Errorf("sethostname %s", err)
  67. }
  68. }
  69. runtime.LockOSThread()
  70. if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil {
  71. return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err)
  72. }
  73. if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
  74. return fmt.Errorf("set process label %s", err)
  75. }
  76. if container.Context["restrictions"] != "" {
  77. if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil {
  78. return err
  79. }
  80. }
  81. pdeathSignal, err := system.GetParentDeathSignal()
  82. if err != nil {
  83. return fmt.Errorf("get parent death signal %s", err)
  84. }
  85. if err := FinalizeNamespace(container); err != nil {
  86. return fmt.Errorf("finalize namespace %s", err)
  87. }
  88. // FinalizeNamespace can change user/group which clears the parent death
  89. // signal, so we restore it here.
  90. if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
  91. return fmt.Errorf("restore parent death signal %s", err)
  92. }
  93. return system.Execv(args[0], args[0:], container.Env)
  94. }
  95. // RestoreParentDeathSignal sets the parent death signal to old.
  96. func RestoreParentDeathSignal(old int) error {
  97. if old == 0 {
  98. return nil
  99. }
  100. current, err := system.GetParentDeathSignal()
  101. if err != nil {
  102. return fmt.Errorf("get parent death signal %s", err)
  103. }
  104. if old == current {
  105. return nil
  106. }
  107. if err := system.ParentDeathSignal(uintptr(old)); err != nil {
  108. return fmt.Errorf("set parent death signal %s", err)
  109. }
  110. // Signal self if parent is already dead. Does nothing if running in a new
  111. // PID namespace, as Getppid will always return 0.
  112. if syscall.Getppid() == 1 {
  113. return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
  114. }
  115. return nil
  116. }
  117. // SetupUser changes the groups, gid, and uid for the user inside the container
  118. func SetupUser(u string) error {
  119. uid, gid, suppGids, err := user.GetUserGroupSupplementary(u, syscall.Getuid(), syscall.Getgid())
  120. if err != nil {
  121. return fmt.Errorf("get supplementary groups %s", err)
  122. }
  123. if err := system.Setgroups(suppGids); err != nil {
  124. return fmt.Errorf("setgroups %s", err)
  125. }
  126. if err := system.Setgid(gid); err != nil {
  127. return fmt.Errorf("setgid %s", err)
  128. }
  129. if err := system.Setuid(uid); err != nil {
  130. return fmt.Errorf("setuid %s", err)
  131. }
  132. return nil
  133. }
  134. // setupVethNetwork uses the Network config if it is not nil to initialize
  135. // the new veth interface inside the container for use by changing the name to eth0
  136. // setting the MTU and IP address along with the default gateway
  137. func setupNetwork(container *libcontainer.Container, context libcontainer.Context) error {
  138. for _, config := range container.Networks {
  139. strategy, err := network.GetStrategy(config.Type)
  140. if err != nil {
  141. return err
  142. }
  143. err1 := strategy.Initialize(config, context)
  144. if err1 != nil {
  145. return err1
  146. }
  147. }
  148. return nil
  149. }
  150. func setupRoute(container *libcontainer.Container) error {
  151. for _, config := range container.Routes {
  152. if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
  153. return err
  154. }
  155. }
  156. return nil
  157. }
  158. // FinalizeNamespace drops the caps, sets the correct user
  159. // and working dir, and closes any leaky file descriptors
  160. // before execing the command inside the namespace
  161. func FinalizeNamespace(container *libcontainer.Container) error {
  162. if err := system.CloseFdsFrom(3); err != nil {
  163. return fmt.Errorf("close open file descriptors %s", err)
  164. }
  165. // drop capabilities in bounding set before changing user
  166. if err := capabilities.DropBoundingSet(container); err != nil {
  167. return fmt.Errorf("drop bounding set %s", err)
  168. }
  169. // preserve existing capabilities while we change users
  170. if err := system.SetKeepCaps(); err != nil {
  171. return fmt.Errorf("set keep caps %s", err)
  172. }
  173. if err := SetupUser(container.User); err != nil {
  174. return fmt.Errorf("setup user %s", err)
  175. }
  176. if err := system.ClearKeepCaps(); err != nil {
  177. return fmt.Errorf("clear keep caps %s", err)
  178. }
  179. // drop all other capabilities
  180. if err := capabilities.DropCapabilities(container); err != nil {
  181. return fmt.Errorf("drop capabilities %s", err)
  182. }
  183. if container.WorkingDir != "" {
  184. if err := system.Chdir(container.WorkingDir); err != nil {
  185. return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
  186. }
  187. }
  188. return nil
  189. }
  190. func LoadContainerEnvironment(container *libcontainer.Container) error {
  191. os.Clearenv()
  192. for _, pair := range container.Env {
  193. p := strings.SplitN(pair, "=", 2)
  194. if len(p) < 2 {
  195. return fmt.Errorf("invalid environment '%v'", pair)
  196. }
  197. if err := os.Setenv(p[0], p[1]); err != nil {
  198. return err
  199. }
  200. }
  201. return nil
  202. }