driver_unix.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. // +build !windows
  2. package execdriver
  3. import (
  4. "encoding/json"
  5. "io/ioutil"
  6. "os"
  7. "path/filepath"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "github.com/docker/docker/daemon/execdriver/native/template"
  12. "github.com/docker/docker/pkg/idtools"
  13. "github.com/docker/docker/pkg/mount"
  14. "github.com/docker/go-units"
  15. "github.com/opencontainers/runc/libcontainer"
  16. "github.com/opencontainers/runc/libcontainer/cgroups/fs"
  17. "github.com/opencontainers/runc/libcontainer/configs"
  18. blkiodev "github.com/opencontainers/runc/libcontainer/configs"
  19. )
  20. // Mount contains information for a mount operation.
  21. type Mount struct {
  22. Source string `json:"source"`
  23. Destination string `json:"destination"`
  24. Writable bool `json:"writable"`
  25. Data string `json:"data"`
  26. Propagation string `json:"mountpropagation"`
  27. }
  28. // Resources contains all resource configs for a driver.
  29. // Currently these are all for cgroup configs.
  30. type Resources struct {
  31. CommonResources
  32. // Fields below here are platform specific
  33. BlkioWeightDevice []*blkiodev.WeightDevice `json:"blkio_weight_device"`
  34. BlkioThrottleReadBpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_read_bps_device"`
  35. BlkioThrottleWriteBpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_write_bps_device"`
  36. BlkioThrottleReadIOpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_read_iops_device"`
  37. BlkioThrottleWriteIOpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_write_iops_device"`
  38. MemorySwap int64 `json:"memory_swap"`
  39. KernelMemory int64 `json:"kernel_memory"`
  40. CPUQuota int64 `json:"cpu_quota"`
  41. CpusetCpus string `json:"cpuset_cpus"`
  42. CpusetMems string `json:"cpuset_mems"`
  43. CPUPeriod int64 `json:"cpu_period"`
  44. Rlimits []*units.Rlimit `json:"rlimits"`
  45. OomKillDisable bool `json:"oom_kill_disable"`
  46. PidsLimit int64 `json:"pids_limit"`
  47. MemorySwappiness int64 `json:"memory_swappiness"`
  48. }
  49. // ProcessConfig is the platform specific structure that describes a process
  50. // that will be run inside a container.
  51. type ProcessConfig struct {
  52. CommonProcessConfig
  53. // Fields below here are platform specific
  54. Privileged bool `json:"privileged"`
  55. User string `json:"user"`
  56. Console string `json:"-"` // dev/console path
  57. }
  58. // Ipc settings of the container
  59. // It is for IPC namespace setting. Usually different containers
  60. // have their own IPC namespace, however this specifies to use
  61. // an existing IPC namespace.
  62. // You can join the host's or a container's IPC namespace.
  63. type Ipc struct {
  64. ContainerID string `json:"container_id"` // id of the container to join ipc.
  65. HostIpc bool `json:"host_ipc"`
  66. }
  67. // Pid settings of the container
  68. // It is for PID namespace setting. Usually different containers
  69. // have their own PID namespace, however this specifies to use
  70. // an existing PID namespace.
  71. // Joining the host's PID namespace is currently the only supported
  72. // option.
  73. type Pid struct {
  74. HostPid bool `json:"host_pid"`
  75. }
  76. // UTS settings of the container
  77. // It is for UTS namespace setting. Usually different containers
  78. // have their own UTS namespace, however this specifies to use
  79. // an existing UTS namespace.
  80. // Joining the host's UTS namespace is currently the only supported
  81. // option.
  82. type UTS struct {
  83. HostUTS bool `json:"host_uts"`
  84. }
  85. // Network settings of the container
  86. type Network struct {
  87. Mtu int `json:"mtu"`
  88. ContainerID string `json:"container_id"` // id of the container to join network.
  89. NamespacePath string `json:"namespace_path"`
  90. HostNetworking bool `json:"host_networking"`
  91. }
  92. // Command wraps an os/exec.Cmd to add more metadata
  93. type Command struct {
  94. CommonCommand
  95. // Fields below here are platform specific
  96. AllowedDevices []*configs.Device `json:"allowed_devices"`
  97. AppArmorProfile string `json:"apparmor_profile"`
  98. AutoCreatedDevices []*configs.Device `json:"autocreated_devices"`
  99. CapAdd []string `json:"cap_add"`
  100. CapDrop []string `json:"cap_drop"`
  101. CgroupParent string `json:"cgroup_parent"` // The parent cgroup for this command.
  102. GIDMapping []idtools.IDMap `json:"gidmapping"`
  103. GroupAdd []string `json:"group_add"`
  104. Ipc *Ipc `json:"ipc"`
  105. OomScoreAdj int `json:"oom_score_adj"`
  106. Pid *Pid `json:"pid"`
  107. ReadonlyRootfs bool `json:"readonly_rootfs"`
  108. RemappedRoot *User `json:"remap_root"`
  109. SeccompProfile string `json:"seccomp_profile"`
  110. UIDMapping []idtools.IDMap `json:"uidmapping"`
  111. UTS *UTS `json:"uts"`
  112. NoNewPrivileges bool `json:"no_new_privileges"`
  113. }
  114. // SetRootPropagation sets the root mount propagation mode.
  115. func SetRootPropagation(config *configs.Config, propagation int) {
  116. config.RootPropagation = propagation
  117. }
  118. // InitContainer is the initialization of a container config.
  119. // It returns the initial configs for a container. It's mostly
  120. // defined by the default template.
  121. func InitContainer(c *Command) *configs.Config {
  122. container := template.New()
  123. container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env)
  124. container.Cgroups.Name = c.ID
  125. container.Cgroups.Resources.AllowedDevices = c.AllowedDevices
  126. container.Devices = filterDevices(c.AutoCreatedDevices, (c.RemappedRoot.UID != 0))
  127. container.Rootfs = c.Rootfs
  128. container.Readonlyfs = c.ReadonlyRootfs
  129. // This can be overridden later by driver during mount setup based
  130. // on volume options
  131. SetRootPropagation(container, mount.RPRIVATE)
  132. container.Cgroups.Parent = c.CgroupParent
  133. // check to see if we are running in ramdisk to disable pivot root
  134. container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
  135. return container
  136. }
  137. func filterDevices(devices []*configs.Device, userNamespacesEnabled bool) []*configs.Device {
  138. if !userNamespacesEnabled {
  139. return devices
  140. }
  141. filtered := []*configs.Device{}
  142. // if we have user namespaces enabled, these devices will not be created
  143. // because of the mknod limitation in the kernel for an unprivileged process.
  144. // Rather, they will be bind-mounted, which will only work if they exist;
  145. // check for existence and remove non-existent entries from the list
  146. for _, device := range devices {
  147. if _, err := os.Stat(device.Path); err == nil {
  148. filtered = append(filtered, device)
  149. }
  150. }
  151. return filtered
  152. }
  153. func getEnv(key string, env []string) string {
  154. for _, pair := range env {
  155. parts := strings.SplitN(pair, "=", 2)
  156. if parts[0] == key {
  157. return parts[1]
  158. }
  159. }
  160. return ""
  161. }
  162. // SetupCgroups setups cgroup resources for a container.
  163. func SetupCgroups(container *configs.Config, c *Command) error {
  164. if c.Resources != nil {
  165. container.Cgroups.Resources.CpuShares = c.Resources.CPUShares
  166. container.Cgroups.Resources.Memory = c.Resources.Memory
  167. container.Cgroups.Resources.MemoryReservation = c.Resources.MemoryReservation
  168. container.Cgroups.Resources.MemorySwap = c.Resources.MemorySwap
  169. container.Cgroups.Resources.KernelMemory = c.Resources.KernelMemory
  170. container.Cgroups.Resources.CpusetCpus = c.Resources.CpusetCpus
  171. container.Cgroups.Resources.CpusetMems = c.Resources.CpusetMems
  172. container.Cgroups.Resources.CpuPeriod = c.Resources.CPUPeriod
  173. container.Cgroups.Resources.CpuQuota = c.Resources.CPUQuota
  174. container.Cgroups.Resources.BlkioWeight = c.Resources.BlkioWeight
  175. container.Cgroups.Resources.BlkioWeightDevice = c.Resources.BlkioWeightDevice
  176. container.Cgroups.Resources.BlkioThrottleReadBpsDevice = c.Resources.BlkioThrottleReadBpsDevice
  177. container.Cgroups.Resources.BlkioThrottleWriteBpsDevice = c.Resources.BlkioThrottleWriteBpsDevice
  178. container.Cgroups.Resources.BlkioThrottleReadIOPSDevice = c.Resources.BlkioThrottleReadIOpsDevice
  179. container.Cgroups.Resources.BlkioThrottleWriteIOPSDevice = c.Resources.BlkioThrottleWriteIOpsDevice
  180. container.Cgroups.Resources.OomKillDisable = c.Resources.OomKillDisable
  181. container.Cgroups.Resources.PidsLimit = c.Resources.PidsLimit
  182. container.Cgroups.Resources.MemorySwappiness = c.Resources.MemorySwappiness
  183. }
  184. return nil
  185. }
  186. // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
  187. func getNetworkInterfaceStats(interfaceName string) (*libcontainer.NetworkInterface, error) {
  188. out := &libcontainer.NetworkInterface{Name: interfaceName}
  189. // This can happen if the network runtime information is missing - possible if the
  190. // container was created by an old version of libcontainer.
  191. if interfaceName == "" {
  192. return out, nil
  193. }
  194. type netStatsPair struct {
  195. // Where to write the output.
  196. Out *uint64
  197. // The network stats file to read.
  198. File string
  199. }
  200. // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
  201. netStats := []netStatsPair{
  202. {Out: &out.RxBytes, File: "tx_bytes"},
  203. {Out: &out.RxPackets, File: "tx_packets"},
  204. {Out: &out.RxErrors, File: "tx_errors"},
  205. {Out: &out.RxDropped, File: "tx_dropped"},
  206. {Out: &out.TxBytes, File: "rx_bytes"},
  207. {Out: &out.TxPackets, File: "rx_packets"},
  208. {Out: &out.TxErrors, File: "rx_errors"},
  209. {Out: &out.TxDropped, File: "rx_dropped"},
  210. }
  211. for _, netStat := range netStats {
  212. data, err := readSysfsNetworkStats(interfaceName, netStat.File)
  213. if err != nil {
  214. return nil, err
  215. }
  216. *(netStat.Out) = data
  217. }
  218. return out, nil
  219. }
  220. // Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
  221. func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
  222. data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
  223. if err != nil {
  224. return 0, err
  225. }
  226. return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
  227. }
  228. // Stats collects all the resource usage information from a container.
  229. func Stats(containerDir string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) {
  230. f, err := os.Open(filepath.Join(containerDir, "state.json"))
  231. if err != nil {
  232. return nil, err
  233. }
  234. defer f.Close()
  235. type network struct {
  236. Type string
  237. HostInterfaceName string
  238. }
  239. state := struct {
  240. CgroupPaths map[string]string `json:"cgroup_paths"`
  241. Networks []network
  242. }{}
  243. if err := json.NewDecoder(f).Decode(&state); err != nil {
  244. return nil, err
  245. }
  246. now := time.Now()
  247. mgr := fs.Manager{Paths: state.CgroupPaths}
  248. cstats, err := mgr.GetStats()
  249. if err != nil {
  250. return nil, err
  251. }
  252. stats := &libcontainer.Stats{CgroupStats: cstats}
  253. // if the container does not have any memory limit specified set the
  254. // limit to the machines memory
  255. memoryLimit := containerMemoryLimit
  256. if memoryLimit == 0 {
  257. memoryLimit = machineMemory
  258. }
  259. for _, iface := range state.Networks {
  260. switch iface.Type {
  261. case "veth":
  262. istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
  263. if err != nil {
  264. return nil, err
  265. }
  266. stats.Interfaces = append(stats.Interfaces, istats)
  267. }
  268. }
  269. return &ResourceStats{
  270. Stats: stats,
  271. Read: now,
  272. MemoryLimit: memoryLimit,
  273. }, nil
  274. }
  275. // User contains the uid and gid representing a Unix user
  276. type User struct {
  277. UID int `json:"root_uid"`
  278. GID int `json:"root_gid"`
  279. }
  280. // ExitStatus provides exit reasons for a container.
  281. type ExitStatus struct {
  282. // The exit code with which the container exited.
  283. ExitCode int
  284. // Whether the container encountered an OOM.
  285. OOMKilled bool
  286. }