apply_systemd.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. // +build linux
  2. package systemd
  3. import (
  4. "fmt"
  5. "io/ioutil"
  6. "os"
  7. "path/filepath"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. systemd1 "github.com/coreos/go-systemd/dbus"
  12. "github.com/dotcloud/docker/pkg/libcontainer/cgroups"
  13. "github.com/dotcloud/docker/pkg/systemd"
  14. "github.com/godbus/dbus"
  15. )
  16. type systemdCgroup struct {
  17. cleanupDirs []string
  18. }
  19. var (
  20. connLock sync.Mutex
  21. theConn *systemd1.Conn
  22. hasStartTransientUnit bool
  23. )
  24. func UseSystemd() bool {
  25. if !systemd.SdBooted() {
  26. return false
  27. }
  28. connLock.Lock()
  29. defer connLock.Unlock()
  30. if theConn == nil {
  31. var err error
  32. theConn, err = systemd1.New()
  33. if err != nil {
  34. return false
  35. }
  36. // Assume we have StartTransientUnit
  37. hasStartTransientUnit = true
  38. // But if we get UnknownMethod error we don't
  39. if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil {
  40. if dbusError, ok := err.(dbus.Error); ok {
  41. if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
  42. hasStartTransientUnit = false
  43. }
  44. }
  45. }
  46. }
  47. return hasStartTransientUnit
  48. }
  49. func getIfaceForUnit(unitName string) string {
  50. if strings.HasSuffix(unitName, ".scope") {
  51. return "Scope"
  52. }
  53. if strings.HasSuffix(unitName, ".service") {
  54. return "Service"
  55. }
  56. return "Unit"
  57. }
  58. type cgroupArg struct {
  59. File string
  60. Value string
  61. }
  62. func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
  63. var (
  64. unitName = getUnitName(c)
  65. slice = "system.slice"
  66. properties []systemd1.Property
  67. cpuArgs []cgroupArg
  68. cpusetArgs []cgroupArg
  69. memoryArgs []cgroupArg
  70. res systemdCgroup
  71. )
  72. // First set up things not supported by systemd
  73. // -1 disables memorySwap
  74. if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) {
  75. memorySwap := c.MemorySwap
  76. if memorySwap == 0 {
  77. // By default, MemorySwap is set to twice the size of RAM.
  78. memorySwap = c.Memory * 2
  79. }
  80. memoryArgs = append(memoryArgs, cgroupArg{"memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10)})
  81. }
  82. if c.CpusetCpus != "" {
  83. cpusetArgs = append(cpusetArgs, cgroupArg{"cpuset.cpus", c.CpusetCpus})
  84. }
  85. if c.Slice != "" {
  86. slice = c.Slice
  87. }
  88. properties = append(properties,
  89. systemd1.Property{"Slice", dbus.MakeVariant(slice)},
  90. systemd1.Property{"Description", dbus.MakeVariant("docker container " + c.Name)},
  91. systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})},
  92. )
  93. if !c.AllowAllDevices {
  94. properties = append(properties,
  95. systemd1.Property{"DevicePolicy", dbus.MakeVariant("strict")})
  96. }
  97. // Always enable accounting, this gets us the same behaviour as the fs implementation,
  98. // plus the kernel has some problems with joining the memory cgroup at a later time.
  99. properties = append(properties,
  100. systemd1.Property{"MemoryAccounting", dbus.MakeVariant(true)},
  101. systemd1.Property{"CPUAccounting", dbus.MakeVariant(true)},
  102. systemd1.Property{"BlockIOAccounting", dbus.MakeVariant(true)})
  103. if c.Memory != 0 {
  104. properties = append(properties,
  105. systemd1.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))})
  106. }
  107. // TODO: MemoryReservation and MemorySwap not available in systemd
  108. if c.CpuShares != 0 {
  109. properties = append(properties,
  110. systemd1.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))})
  111. }
  112. if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
  113. return nil, err
  114. }
  115. // To work around the lack of /dev/pts/* support above we need to manually add these
  116. // so, ask systemd for the cgroup used
  117. props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName))
  118. if err != nil {
  119. return nil, err
  120. }
  121. cgroup := props["ControlGroup"].(string)
  122. if !c.AllowAllDevices {
  123. mountpoint, err := cgroups.FindCgroupMountpoint("devices")
  124. if err != nil {
  125. return nil, err
  126. }
  127. dir := filepath.Join(mountpoint, cgroup)
  128. // We use the same method of allowing devices as in the fs backend. This needs to be changed to use DBUS as soon as possible. However, that change has to wait untill http://cgit.freedesktop.org/systemd/systemd/commit/?id=90060676c442604780634c0a993e3f9c3733f8e6 has been applied in most commonly used systemd versions.
  129. for _, dev := range c.AllowedDevices {
  130. if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil {
  131. return nil, err
  132. }
  133. }
  134. }
  135. if len(cpuArgs) != 0 {
  136. mountpoint, err := cgroups.FindCgroupMountpoint("cpu")
  137. if err != nil {
  138. return nil, err
  139. }
  140. path := filepath.Join(mountpoint, cgroup)
  141. for _, arg := range cpuArgs {
  142. if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil {
  143. return nil, err
  144. }
  145. }
  146. }
  147. if len(memoryArgs) != 0 {
  148. mountpoint, err := cgroups.FindCgroupMountpoint("memory")
  149. if err != nil {
  150. return nil, err
  151. }
  152. path := filepath.Join(mountpoint, cgroup)
  153. for _, arg := range memoryArgs {
  154. if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil {
  155. return nil, err
  156. }
  157. }
  158. }
  159. if len(cpusetArgs) != 0 {
  160. // systemd does not atm set up the cpuset controller, so we must manually
  161. // join it. Additionally that is a very finicky controller where each
  162. // level must have a full setup as the default for a new directory is "no cpus",
  163. // so we avoid using any hierarchies here, creating a toplevel directory.
  164. mountpoint, err := cgroups.FindCgroupMountpoint("cpuset")
  165. if err != nil {
  166. return nil, err
  167. }
  168. initPath, err := cgroups.GetInitCgroupDir("cpuset")
  169. if err != nil {
  170. return nil, err
  171. }
  172. rootPath := filepath.Join(mountpoint, initPath)
  173. path := filepath.Join(mountpoint, initPath, c.Parent+"-"+c.Name)
  174. res.cleanupDirs = append(res.cleanupDirs, path)
  175. if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
  176. return nil, err
  177. }
  178. foundCpus := false
  179. foundMems := false
  180. for _, arg := range cpusetArgs {
  181. if arg.File == "cpuset.cpus" {
  182. foundCpus = true
  183. }
  184. if arg.File == "cpuset.mems" {
  185. foundMems = true
  186. }
  187. if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil {
  188. return nil, err
  189. }
  190. }
  191. // These are required, if not specified inherit from parent
  192. if !foundCpus {
  193. s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.cpus"))
  194. if err != nil {
  195. return nil, err
  196. }
  197. if err := ioutil.WriteFile(filepath.Join(path, "cpuset.cpus"), s, 0700); err != nil {
  198. return nil, err
  199. }
  200. }
  201. // These are required, if not specified inherit from parent
  202. if !foundMems {
  203. s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.mems"))
  204. if err != nil {
  205. return nil, err
  206. }
  207. if err := ioutil.WriteFile(filepath.Join(path, "cpuset.mems"), s, 0700); err != nil {
  208. return nil, err
  209. }
  210. }
  211. if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil {
  212. return nil, err
  213. }
  214. }
  215. return &res, nil
  216. }
  217. func writeFile(dir, file, data string) error {
  218. return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
  219. }
  220. func (c *systemdCgroup) Cleanup() error {
  221. // systemd cleans up, we don't need to do much
  222. for _, path := range c.cleanupDirs {
  223. os.RemoveAll(path)
  224. }
  225. return nil
  226. }
  227. func GetPids(c *cgroups.Cgroup) ([]int, error) {
  228. unitName := getUnitName(c)
  229. mountpoint, err := cgroups.FindCgroupMountpoint("cpu")
  230. if err != nil {
  231. return nil, err
  232. }
  233. props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName))
  234. if err != nil {
  235. return nil, err
  236. }
  237. cgroup := props["ControlGroup"].(string)
  238. return cgroups.ReadProcsFile(filepath.Join(mountpoint, cgroup))
  239. }
  240. func getUnitName(c *cgroups.Cgroup) string {
  241. return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name)
  242. }