daemon_unix.go 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382
  1. // +build linux freebsd
  2. package daemon
  3. import (
  4. "bufio"
  5. "bytes"
  6. "fmt"
  7. "io/ioutil"
  8. "net"
  9. "os"
  10. "path/filepath"
  11. "runtime"
  12. "runtime/debug"
  13. "strconv"
  14. "strings"
  15. "time"
  16. "github.com/docker/docker/api/types"
  17. "github.com/docker/docker/api/types/blkiodev"
  18. pblkiodev "github.com/docker/docker/api/types/blkiodev"
  19. containertypes "github.com/docker/docker/api/types/container"
  20. "github.com/docker/docker/container"
  21. "github.com/docker/docker/daemon/config"
  22. "github.com/docker/docker/image"
  23. "github.com/docker/docker/opts"
  24. "github.com/docker/docker/pkg/idtools"
  25. "github.com/docker/docker/pkg/parsers"
  26. "github.com/docker/docker/pkg/parsers/kernel"
  27. "github.com/docker/docker/pkg/sysinfo"
  28. "github.com/docker/docker/runconfig"
  29. "github.com/docker/docker/volume"
  30. "github.com/docker/libnetwork"
  31. nwconfig "github.com/docker/libnetwork/config"
  32. "github.com/docker/libnetwork/drivers/bridge"
  33. "github.com/docker/libnetwork/netlabel"
  34. "github.com/docker/libnetwork/netutils"
  35. "github.com/docker/libnetwork/options"
  36. lntypes "github.com/docker/libnetwork/types"
  37. "github.com/golang/protobuf/ptypes"
  38. "github.com/opencontainers/runc/libcontainer/cgroups"
  39. rsystem "github.com/opencontainers/runc/libcontainer/system"
  40. specs "github.com/opencontainers/runtime-spec/specs-go"
  41. "github.com/opencontainers/selinux/go-selinux/label"
  42. "github.com/pkg/errors"
  43. "github.com/sirupsen/logrus"
  44. "github.com/vishvananda/netlink"
  45. "golang.org/x/sys/unix"
  46. )
  47. const (
  48. // See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
  49. linuxMinCPUShares = 2
  50. linuxMaxCPUShares = 262144
  51. platformSupported = true
  52. // It's not kernel limit, we want this 4M limit to supply a reasonable functional container
  53. linuxMinMemory = 4194304
  54. // constants for remapped root settings
  55. defaultIDSpecifier string = "default"
  56. defaultRemappedID string = "dockremap"
  57. // constant for cgroup drivers
  58. cgroupFsDriver = "cgroupfs"
  59. cgroupSystemdDriver = "systemd"
  60. )
  61. type containerGetter interface {
  62. GetContainer(string) (*container.Container, error)
  63. }
  64. func getMemoryResources(config containertypes.Resources) *specs.LinuxMemory {
  65. memory := specs.LinuxMemory{}
  66. if config.Memory > 0 {
  67. limit := uint64(config.Memory)
  68. memory.Limit = &limit
  69. }
  70. if config.MemoryReservation > 0 {
  71. reservation := uint64(config.MemoryReservation)
  72. memory.Reservation = &reservation
  73. }
  74. if config.MemorySwap > 0 {
  75. swap := uint64(config.MemorySwap)
  76. memory.Swap = &swap
  77. }
  78. if config.MemorySwappiness != nil {
  79. swappiness := uint64(*config.MemorySwappiness)
  80. memory.Swappiness = &swappiness
  81. }
  82. if config.KernelMemory != 0 {
  83. kernelMemory := uint64(config.KernelMemory)
  84. memory.Kernel = &kernelMemory
  85. }
  86. return &memory
  87. }
  88. func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) {
  89. cpu := specs.LinuxCPU{}
  90. if config.CPUShares < 0 {
  91. return nil, fmt.Errorf("shares: invalid argument")
  92. }
  93. if config.CPUShares >= 0 {
  94. shares := uint64(config.CPUShares)
  95. cpu.Shares = &shares
  96. }
  97. if config.CpusetCpus != "" {
  98. cpu.Cpus = config.CpusetCpus
  99. }
  100. if config.CpusetMems != "" {
  101. cpu.Mems = config.CpusetMems
  102. }
  103. if config.NanoCPUs > 0 {
  104. // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
  105. period := uint64(100 * time.Millisecond / time.Microsecond)
  106. quota := config.NanoCPUs * int64(period) / 1e9
  107. cpu.Period = &period
  108. cpu.Quota = &quota
  109. }
  110. if config.CPUPeriod != 0 {
  111. period := uint64(config.CPUPeriod)
  112. cpu.Period = &period
  113. }
  114. if config.CPUQuota != 0 {
  115. q := config.CPUQuota
  116. cpu.Quota = &q
  117. }
  118. if config.CPURealtimePeriod != 0 {
  119. period := uint64(config.CPURealtimePeriod)
  120. cpu.RealtimePeriod = &period
  121. }
  122. if config.CPURealtimeRuntime != 0 {
  123. c := config.CPURealtimeRuntime
  124. cpu.RealtimeRuntime = &c
  125. }
  126. return &cpu, nil
  127. }
  128. func getBlkioWeightDevices(config containertypes.Resources) ([]specs.LinuxWeightDevice, error) {
  129. var stat unix.Stat_t
  130. var blkioWeightDevices []specs.LinuxWeightDevice
  131. for _, weightDevice := range config.BlkioWeightDevice {
  132. if err := unix.Stat(weightDevice.Path, &stat); err != nil {
  133. return nil, err
  134. }
  135. weight := weightDevice.Weight
  136. d := specs.LinuxWeightDevice{Weight: &weight}
  137. d.Major = int64(stat.Rdev / 256)
  138. d.Minor = int64(stat.Rdev % 256)
  139. blkioWeightDevices = append(blkioWeightDevices, d)
  140. }
  141. return blkioWeightDevices, nil
  142. }
  143. func (daemon *Daemon) parseSecurityOpt(container *container.Container, hostConfig *containertypes.HostConfig) error {
  144. container.NoNewPrivileges = daemon.configStore.NoNewPrivileges
  145. return parseSecurityOpt(container, hostConfig)
  146. }
  147. func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error {
  148. var (
  149. labelOpts []string
  150. err error
  151. )
  152. for _, opt := range config.SecurityOpt {
  153. if opt == "no-new-privileges" {
  154. container.NoNewPrivileges = true
  155. continue
  156. }
  157. if opt == "disable" {
  158. labelOpts = append(labelOpts, "disable")
  159. continue
  160. }
  161. var con []string
  162. if strings.Contains(opt, "=") {
  163. con = strings.SplitN(opt, "=", 2)
  164. } else if strings.Contains(opt, ":") {
  165. con = strings.SplitN(opt, ":", 2)
  166. logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 17.04, use `=` instead.")
  167. }
  168. if len(con) != 2 {
  169. return fmt.Errorf("invalid --security-opt 1: %q", opt)
  170. }
  171. switch con[0] {
  172. case "label":
  173. labelOpts = append(labelOpts, con[1])
  174. case "apparmor":
  175. container.AppArmorProfile = con[1]
  176. case "seccomp":
  177. container.SeccompProfile = con[1]
  178. case "no-new-privileges":
  179. noNewPrivileges, err := strconv.ParseBool(con[1])
  180. if err != nil {
  181. return fmt.Errorf("invalid --security-opt 2: %q", opt)
  182. }
  183. container.NoNewPrivileges = noNewPrivileges
  184. default:
  185. return fmt.Errorf("invalid --security-opt 2: %q", opt)
  186. }
  187. }
  188. container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts)
  189. return err
  190. }
  191. func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.LinuxThrottleDevice, error) {
  192. var throttleDevices []specs.LinuxThrottleDevice
  193. var stat unix.Stat_t
  194. for _, d := range devs {
  195. if err := unix.Stat(d.Path, &stat); err != nil {
  196. return nil, err
  197. }
  198. d := specs.LinuxThrottleDevice{Rate: d.Rate}
  199. d.Major = int64(stat.Rdev / 256)
  200. d.Minor = int64(stat.Rdev % 256)
  201. throttleDevices = append(throttleDevices, d)
  202. }
  203. return throttleDevices, nil
  204. }
  205. func checkKernel() error {
  206. // Check for unsupported kernel versions
  207. // FIXME: it would be cleaner to not test for specific versions, but rather
  208. // test for specific functionalities.
  209. // Unfortunately we can't test for the feature "does not cause a kernel panic"
  210. // without actually causing a kernel panic, so we need this workaround until
  211. // the circumstances of pre-3.10 crashes are clearer.
  212. // For details see https://github.com/docker/docker/issues/407
  213. // Docker 1.11 and above doesn't actually run on kernels older than 3.4,
  214. // due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4).
  215. if !kernel.CheckKernelVersion(3, 10, 0) {
  216. v, _ := kernel.GetKernelVersion()
  217. if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" {
  218. logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String())
  219. }
  220. }
  221. return nil
  222. }
  223. // adaptContainerSettings is called during container creation to modify any
  224. // settings necessary in the HostConfig structure.
  225. func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error {
  226. if adjustCPUShares && hostConfig.CPUShares > 0 {
  227. // Handle unsupported CPUShares
  228. if hostConfig.CPUShares < linuxMinCPUShares {
  229. logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares)
  230. hostConfig.CPUShares = linuxMinCPUShares
  231. } else if hostConfig.CPUShares > linuxMaxCPUShares {
  232. logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares)
  233. hostConfig.CPUShares = linuxMaxCPUShares
  234. }
  235. }
  236. if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
  237. // By default, MemorySwap is set to twice the size of Memory.
  238. hostConfig.MemorySwap = hostConfig.Memory * 2
  239. }
  240. if hostConfig.ShmSize == 0 {
  241. hostConfig.ShmSize = config.DefaultShmSize
  242. if daemon.configStore != nil {
  243. hostConfig.ShmSize = int64(daemon.configStore.ShmSize)
  244. }
  245. }
  246. // Set default IPC mode, if unset for container
  247. if hostConfig.IpcMode.IsEmpty() {
  248. m := config.DefaultIpcMode
  249. if daemon.configStore != nil {
  250. m = daemon.configStore.IpcMode
  251. }
  252. hostConfig.IpcMode = containertypes.IpcMode(m)
  253. }
  254. adaptSharedNamespaceContainer(daemon, hostConfig)
  255. var err error
  256. opts, err := daemon.generateSecurityOpt(hostConfig)
  257. if err != nil {
  258. return err
  259. }
  260. hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...)
  261. if hostConfig.OomKillDisable == nil {
  262. defaultOomKillDisable := false
  263. hostConfig.OomKillDisable = &defaultOomKillDisable
  264. }
  265. return nil
  266. }
  267. // adaptSharedNamespaceContainer replaces container name with its ID in hostConfig.
  268. // To be more precisely, it modifies `container:name` to `container:ID` of PidMode, IpcMode
  269. // and NetworkMode.
  270. //
  271. // When a container shares its namespace with another container, use ID can keep the namespace
  272. // sharing connection between the two containers even the another container is renamed.
  273. func adaptSharedNamespaceContainer(daemon containerGetter, hostConfig *containertypes.HostConfig) {
  274. containerPrefix := "container:"
  275. if hostConfig.PidMode.IsContainer() {
  276. pidContainer := hostConfig.PidMode.Container()
  277. // if there is any error returned here, we just ignore it and leave it to be
  278. // handled in the following logic
  279. if c, err := daemon.GetContainer(pidContainer); err == nil {
  280. hostConfig.PidMode = containertypes.PidMode(containerPrefix + c.ID)
  281. }
  282. }
  283. if hostConfig.IpcMode.IsContainer() {
  284. ipcContainer := hostConfig.IpcMode.Container()
  285. if c, err := daemon.GetContainer(ipcContainer); err == nil {
  286. hostConfig.IpcMode = containertypes.IpcMode(containerPrefix + c.ID)
  287. }
  288. }
  289. if hostConfig.NetworkMode.IsContainer() {
  290. netContainer := hostConfig.NetworkMode.ConnectedContainer()
  291. if c, err := daemon.GetContainer(netContainer); err == nil {
  292. hostConfig.NetworkMode = containertypes.NetworkMode(containerPrefix + c.ID)
  293. }
  294. }
  295. }
  296. func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) {
  297. warnings := []string{}
  298. fixMemorySwappiness(resources)
  299. // memory subsystem checks and adjustments
  300. if resources.Memory != 0 && resources.Memory < linuxMinMemory {
  301. return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB")
  302. }
  303. if resources.Memory > 0 && !sysInfo.MemoryLimit {
  304. warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
  305. logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
  306. resources.Memory = 0
  307. resources.MemorySwap = -1
  308. }
  309. if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit {
  310. warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.")
  311. logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.")
  312. resources.MemorySwap = -1
  313. }
  314. if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory {
  315. return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage")
  316. }
  317. if resources.Memory == 0 && resources.MemorySwap > 0 && !update {
  318. return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage")
  319. }
  320. if resources.MemorySwappiness != nil && !sysInfo.MemorySwappiness {
  321. warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.")
  322. logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.")
  323. resources.MemorySwappiness = nil
  324. }
  325. if resources.MemorySwappiness != nil {
  326. swappiness := *resources.MemorySwappiness
  327. if swappiness < 0 || swappiness > 100 {
  328. return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness)
  329. }
  330. }
  331. if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation {
  332. warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
  333. logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
  334. resources.MemoryReservation = 0
  335. }
  336. if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory {
  337. return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB")
  338. }
  339. if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation {
  340. return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage")
  341. }
  342. if resources.KernelMemory > 0 && !sysInfo.KernelMemory {
  343. warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
  344. logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
  345. resources.KernelMemory = 0
  346. }
  347. if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory {
  348. return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB")
  349. }
  350. if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) {
  351. warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
  352. logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
  353. }
  354. if resources.OomKillDisable != nil && !sysInfo.OomKillDisable {
  355. // only produce warnings if the setting wasn't to *disable* the OOM Kill; no point
  356. // warning the caller if they already wanted the feature to be off
  357. if *resources.OomKillDisable {
  358. warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.")
  359. logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.")
  360. }
  361. resources.OomKillDisable = nil
  362. }
  363. if resources.PidsLimit != 0 && !sysInfo.PidsLimit {
  364. warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
  365. logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
  366. resources.PidsLimit = 0
  367. }
  368. // cpu subsystem checks and adjustments
  369. if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 {
  370. return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set")
  371. }
  372. if resources.NanoCPUs > 0 && resources.CPUQuota > 0 {
  373. return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set")
  374. }
  375. if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) {
  376. return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted")
  377. }
  378. // The highest precision we could get on Linux is 0.001, by setting
  379. // cpu.cfs_period_us=1000ms
  380. // cpu.cfs_quota=1ms
  381. // See the following link for details:
  382. // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
  383. // Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error.
  384. // The error message is 0.01 so that this is consistent with Windows
  385. if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 {
  386. return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU())
  387. }
  388. if resources.CPUShares > 0 && !sysInfo.CPUShares {
  389. warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
  390. logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
  391. resources.CPUShares = 0
  392. }
  393. if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod {
  394. warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
  395. logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
  396. resources.CPUPeriod = 0
  397. }
  398. if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) {
  399. return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)")
  400. }
  401. if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota {
  402. warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
  403. logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
  404. resources.CPUQuota = 0
  405. }
  406. if resources.CPUQuota > 0 && resources.CPUQuota < 1000 {
  407. return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)")
  408. }
  409. if resources.CPUPercent > 0 {
  410. warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS))
  411. logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)
  412. resources.CPUPercent = 0
  413. }
  414. // cpuset subsystem checks and adjustments
  415. if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset {
  416. warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
  417. logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
  418. resources.CpusetCpus = ""
  419. resources.CpusetMems = ""
  420. }
  421. cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus)
  422. if err != nil {
  423. return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus)
  424. }
  425. if !cpusAvailable {
  426. return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus)
  427. }
  428. memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems)
  429. if err != nil {
  430. return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems)
  431. }
  432. if !memsAvailable {
  433. return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems)
  434. }
  435. // blkio subsystem checks and adjustments
  436. if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight {
  437. warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
  438. logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
  439. resources.BlkioWeight = 0
  440. }
  441. if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) {
  442. return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000")
  443. }
  444. if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 {
  445. return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS)
  446. }
  447. if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice {
  448. warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
  449. logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
  450. resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{}
  451. }
  452. if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice {
  453. warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.")
  454. logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded")
  455. resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{}
  456. }
  457. if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice {
  458. warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
  459. logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
  460. resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{}
  461. }
  462. if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice {
  463. warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
  464. logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
  465. resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{}
  466. }
  467. if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice {
  468. warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
  469. logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
  470. resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{}
  471. }
  472. return warnings, nil
  473. }
  474. func (daemon *Daemon) getCgroupDriver() string {
  475. cgroupDriver := cgroupFsDriver
  476. if UsingSystemd(daemon.configStore) {
  477. cgroupDriver = cgroupSystemdDriver
  478. }
  479. return cgroupDriver
  480. }
  481. // getCD gets the raw value of the native.cgroupdriver option, if set.
  482. func getCD(config *config.Config) string {
  483. for _, option := range config.ExecOptions {
  484. key, val, err := parsers.ParseKeyValueOpt(option)
  485. if err != nil || !strings.EqualFold(key, "native.cgroupdriver") {
  486. continue
  487. }
  488. return val
  489. }
  490. return ""
  491. }
  492. // VerifyCgroupDriver validates native.cgroupdriver
  493. func VerifyCgroupDriver(config *config.Config) error {
  494. cd := getCD(config)
  495. if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver {
  496. return nil
  497. }
  498. return fmt.Errorf("native.cgroupdriver option %s not supported", cd)
  499. }
  500. // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd
  501. func UsingSystemd(config *config.Config) bool {
  502. return getCD(config) == cgroupSystemdDriver
  503. }
  504. // verifyPlatformContainerSettings performs platform-specific validation of the
  505. // hostconfig and config structures.
  506. func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) {
  507. var warnings []string
  508. sysInfo := sysinfo.New(true)
  509. warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
  510. if err != nil {
  511. return warnings, err
  512. }
  513. w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update)
  514. // no matter err is nil or not, w could have data in itself.
  515. warnings = append(warnings, w...)
  516. if err != nil {
  517. return warnings, err
  518. }
  519. if hostConfig.ShmSize < 0 {
  520. return warnings, fmt.Errorf("SHM size can not be less than 0")
  521. }
  522. if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 {
  523. return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj)
  524. }
  525. // ip-forwarding does not affect container with '--net=host' (or '--net=none')
  526. if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) {
  527. warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
  528. logrus.Warn("IPv4 forwarding is disabled. Networking will not work")
  529. }
  530. // check for various conflicting options with user namespaces
  531. if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
  532. if hostConfig.Privileged {
  533. return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces. You must run the container in the host namespace when running privileged mode.")
  534. }
  535. if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
  536. return warnings, fmt.Errorf("Cannot share the host's network namespace when user namespaces are enabled")
  537. }
  538. if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
  539. return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled")
  540. }
  541. }
  542. if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) {
  543. // CgroupParent for systemd cgroup should be named as "xxx.slice"
  544. if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") {
  545. return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
  546. }
  547. }
  548. if hostConfig.Runtime == "" {
  549. hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
  550. }
  551. if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil {
  552. return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime)
  553. }
  554. for dest := range hostConfig.Tmpfs {
  555. if err := volume.ValidateTmpfsMountDestination(dest); err != nil {
  556. return warnings, err
  557. }
  558. }
  559. return warnings, nil
  560. }
  561. // reloadPlatform updates configuration with platform specific options
  562. // and updates the passed attributes
  563. func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error {
  564. if err := conf.ValidatePlatformConfig(); err != nil {
  565. return err
  566. }
  567. if conf.IsValueSet("runtimes") {
  568. daemon.configStore.Runtimes = conf.Runtimes
  569. // Always set the default one
  570. daemon.configStore.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
  571. }
  572. if conf.DefaultRuntime != "" {
  573. daemon.configStore.DefaultRuntime = conf.DefaultRuntime
  574. }
  575. if conf.IsValueSet("default-shm-size") {
  576. daemon.configStore.ShmSize = conf.ShmSize
  577. }
  578. if conf.IpcMode != "" {
  579. daemon.configStore.IpcMode = conf.IpcMode
  580. }
  581. // Update attributes
  582. var runtimeList bytes.Buffer
  583. for name, rt := range daemon.configStore.Runtimes {
  584. if runtimeList.Len() > 0 {
  585. runtimeList.WriteRune(' ')
  586. }
  587. runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt))
  588. }
  589. attributes["runtimes"] = runtimeList.String()
  590. attributes["default-runtime"] = daemon.configStore.DefaultRuntime
  591. attributes["default-shm-size"] = fmt.Sprintf("%d", daemon.configStore.ShmSize)
  592. attributes["default-ipc-mode"] = daemon.configStore.IpcMode
  593. return nil
  594. }
  595. // verifyDaemonSettings performs validation of daemon config struct
  596. func verifyDaemonSettings(conf *config.Config) error {
  597. // Check for mutually incompatible config options
  598. if conf.BridgeConfig.Iface != "" && conf.BridgeConfig.IP != "" {
  599. return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one")
  600. }
  601. if !conf.BridgeConfig.EnableIPTables && !conf.BridgeConfig.InterContainerCommunication {
  602. return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true")
  603. }
  604. if !conf.BridgeConfig.EnableIPTables && conf.BridgeConfig.EnableIPMasq {
  605. conf.BridgeConfig.EnableIPMasq = false
  606. }
  607. if err := VerifyCgroupDriver(conf); err != nil {
  608. return err
  609. }
  610. if conf.CgroupParent != "" && UsingSystemd(conf) {
  611. if len(conf.CgroupParent) <= 6 || !strings.HasSuffix(conf.CgroupParent, ".slice") {
  612. return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
  613. }
  614. }
  615. if conf.DefaultRuntime == "" {
  616. conf.DefaultRuntime = config.StockRuntimeName
  617. }
  618. if conf.Runtimes == nil {
  619. conf.Runtimes = make(map[string]types.Runtime)
  620. }
  621. conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
  622. return nil
  623. }
  624. // checkSystem validates platform-specific requirements
  625. func checkSystem() error {
  626. if os.Geteuid() != 0 {
  627. return fmt.Errorf("The Docker daemon needs to be run as root")
  628. }
  629. return checkKernel()
  630. }
  631. // configureMaxThreads sets the Go runtime max threads threshold
  632. // which is 90% of the kernel setting from /proc/sys/kernel/threads-max
  633. func configureMaxThreads(config *config.Config) error {
  634. mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
  635. if err != nil {
  636. return err
  637. }
  638. mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
  639. if err != nil {
  640. return err
  641. }
  642. maxThreads := (mtint / 100) * 90
  643. debug.SetMaxThreads(maxThreads)
  644. logrus.Debugf("Golang's threads limit set to %d", maxThreads)
  645. return nil
  646. }
  647. func overlaySupportsSelinux() (bool, error) {
  648. f, err := os.Open("/proc/kallsyms")
  649. if err != nil {
  650. if os.IsNotExist(err) {
  651. return false, nil
  652. }
  653. return false, err
  654. }
  655. defer f.Close()
  656. var symAddr, symType, symName, text string
  657. s := bufio.NewScanner(f)
  658. for s.Scan() {
  659. if err := s.Err(); err != nil {
  660. return false, err
  661. }
  662. text = s.Text()
  663. if _, err := fmt.Sscanf(text, "%s %s %s", &symAddr, &symType, &symName); err != nil {
  664. return false, fmt.Errorf("Scanning '%s' failed: %s", text, err)
  665. }
  666. // Check for presence of symbol security_inode_copy_up.
  667. if symName == "security_inode_copy_up" {
  668. return true, nil
  669. }
  670. }
  671. return false, nil
  672. }
  673. // configureKernelSecuritySupport configures and validates security support for the kernel
  674. func configureKernelSecuritySupport(config *config.Config, driverNames []string) error {
  675. if config.EnableSelinuxSupport {
  676. if !selinuxEnabled() {
  677. logrus.Warn("Docker could not enable SELinux on the host system")
  678. return nil
  679. }
  680. overlayFound := false
  681. for _, d := range driverNames {
  682. if d == "overlay" || d == "overlay2" {
  683. overlayFound = true
  684. break
  685. }
  686. }
  687. if overlayFound {
  688. // If driver is overlay or overlay2, make sure kernel
  689. // supports selinux with overlay.
  690. supported, err := overlaySupportsSelinux()
  691. if err != nil {
  692. return err
  693. }
  694. if !supported {
  695. logrus.Warnf("SELinux is not supported with the %v graph driver on this kernel", driverNames)
  696. }
  697. }
  698. } else {
  699. selinuxSetDisabled()
  700. }
  701. return nil
  702. }
  703. func (daemon *Daemon) initNetworkController(config *config.Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) {
  704. netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes)
  705. if err != nil {
  706. return nil, err
  707. }
  708. controller, err := libnetwork.New(netOptions...)
  709. if err != nil {
  710. return nil, fmt.Errorf("error obtaining controller instance: %v", err)
  711. }
  712. if len(activeSandboxes) > 0 {
  713. logrus.Info("There are old running containers, the network config will not take affect")
  714. return controller, nil
  715. }
  716. // Initialize default network on "null"
  717. if n, _ := controller.NetworkByName("none"); n == nil {
  718. if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil {
  719. return nil, fmt.Errorf("Error creating default \"null\" network: %v", err)
  720. }
  721. }
  722. // Initialize default network on "host"
  723. if n, _ := controller.NetworkByName("host"); n == nil {
  724. if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil {
  725. return nil, fmt.Errorf("Error creating default \"host\" network: %v", err)
  726. }
  727. }
  728. // Clear stale bridge network
  729. if n, err := controller.NetworkByName("bridge"); err == nil {
  730. if err = n.Delete(); err != nil {
  731. return nil, fmt.Errorf("could not delete the default bridge network: %v", err)
  732. }
  733. }
  734. if !config.DisableBridge {
  735. // Initialize default driver "bridge"
  736. if err := initBridgeDriver(controller, config); err != nil {
  737. return nil, err
  738. }
  739. } else {
  740. removeDefaultBridgeInterface()
  741. }
  742. return controller, nil
  743. }
  744. func driverOptions(config *config.Config) []nwconfig.Option {
  745. bridgeConfig := options.Generic{
  746. "EnableIPForwarding": config.BridgeConfig.EnableIPForward,
  747. "EnableIPTables": config.BridgeConfig.EnableIPTables,
  748. "EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy,
  749. "UserlandProxyPath": config.BridgeConfig.UserlandProxyPath}
  750. bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
  751. dOptions := []nwconfig.Option{}
  752. dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption))
  753. return dOptions
  754. }
  755. func initBridgeDriver(controller libnetwork.NetworkController, config *config.Config) error {
  756. bridgeName := bridge.DefaultBridgeName
  757. if config.BridgeConfig.Iface != "" {
  758. bridgeName = config.BridgeConfig.Iface
  759. }
  760. netOption := map[string]string{
  761. bridge.BridgeName: bridgeName,
  762. bridge.DefaultBridge: strconv.FormatBool(true),
  763. netlabel.DriverMTU: strconv.Itoa(config.Mtu),
  764. bridge.EnableIPMasquerade: strconv.FormatBool(config.BridgeConfig.EnableIPMasq),
  765. bridge.EnableICC: strconv.FormatBool(config.BridgeConfig.InterContainerCommunication),
  766. }
  767. // --ip processing
  768. if config.BridgeConfig.DefaultIP != nil {
  769. netOption[bridge.DefaultBindingIP] = config.BridgeConfig.DefaultIP.String()
  770. }
  771. var (
  772. ipamV4Conf *libnetwork.IpamConf
  773. ipamV6Conf *libnetwork.IpamConf
  774. )
  775. ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
  776. nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName)
  777. if err != nil {
  778. return errors.Wrap(err, "list bridge addresses failed")
  779. }
  780. nw := nwList[0]
  781. if len(nwList) > 1 && config.BridgeConfig.FixedCIDR != "" {
  782. _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR)
  783. if err != nil {
  784. return errors.Wrap(err, "parse CIDR failed")
  785. }
  786. // Iterate through in case there are multiple addresses for the bridge
  787. for _, entry := range nwList {
  788. if fCIDR.Contains(entry.IP) {
  789. nw = entry
  790. break
  791. }
  792. }
  793. }
  794. ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
  795. hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
  796. if hip.IsGlobalUnicast() {
  797. ipamV4Conf.Gateway = nw.IP.String()
  798. }
  799. if config.BridgeConfig.IP != "" {
  800. ipamV4Conf.PreferredPool = config.BridgeConfig.IP
  801. ip, _, err := net.ParseCIDR(config.BridgeConfig.IP)
  802. if err != nil {
  803. return err
  804. }
  805. ipamV4Conf.Gateway = ip.String()
  806. } else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" {
  807. logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool)
  808. }
  809. if config.BridgeConfig.FixedCIDR != "" {
  810. _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR)
  811. if err != nil {
  812. return err
  813. }
  814. ipamV4Conf.SubPool = fCIDR.String()
  815. }
  816. if config.BridgeConfig.DefaultGatewayIPv4 != nil {
  817. ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.BridgeConfig.DefaultGatewayIPv4.String()
  818. }
  819. var deferIPv6Alloc bool
  820. if config.BridgeConfig.FixedCIDRv6 != "" {
  821. _, fCIDRv6, err := net.ParseCIDR(config.BridgeConfig.FixedCIDRv6)
  822. if err != nil {
  823. return err
  824. }
  825. // In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has
  826. // at least 48 host bits, we need to guarantee the current behavior where the containers'
  827. // IPv6 addresses will be constructed based on the containers' interface MAC address.
  828. // We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints
  829. // on this network until after the driver has created the endpoint and returned the
  830. // constructed address. Libnetwork will then reserve this address with the ipam driver.
  831. ones, _ := fCIDRv6.Mask.Size()
  832. deferIPv6Alloc = ones <= 80
  833. if ipamV6Conf == nil {
  834. ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
  835. }
  836. ipamV6Conf.PreferredPool = fCIDRv6.String()
  837. // In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6
  838. // address belongs to the same network, we need to inform libnetwork about it, so
  839. // that it can be reserved with IPAM and it will not be given away to somebody else
  840. for _, nw6 := range nw6List {
  841. if fCIDRv6.Contains(nw6.IP) {
  842. ipamV6Conf.Gateway = nw6.IP.String()
  843. break
  844. }
  845. }
  846. }
  847. if config.BridgeConfig.DefaultGatewayIPv6 != nil {
  848. if ipamV6Conf == nil {
  849. ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
  850. }
  851. ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.BridgeConfig.DefaultGatewayIPv6.String()
  852. }
  853. v4Conf := []*libnetwork.IpamConf{ipamV4Conf}
  854. v6Conf := []*libnetwork.IpamConf{}
  855. if ipamV6Conf != nil {
  856. v6Conf = append(v6Conf, ipamV6Conf)
  857. }
  858. // Initialize default network on "bridge" with the same name
  859. _, err = controller.NewNetwork("bridge", "bridge", "",
  860. libnetwork.NetworkOptionEnableIPv6(config.BridgeConfig.EnableIPv6),
  861. libnetwork.NetworkOptionDriverOpts(netOption),
  862. libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil),
  863. libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc))
  864. if err != nil {
  865. return fmt.Errorf("Error creating default \"bridge\" network: %v", err)
  866. }
  867. return nil
  868. }
  869. // Remove default bridge interface if present (--bridge=none use case)
  870. func removeDefaultBridgeInterface() {
  871. if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
  872. if err := netlink.LinkDel(lnk); err != nil {
  873. logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
  874. }
  875. }
  876. }
  877. func (daemon *Daemon) getLayerInit() func(string) error {
  878. return daemon.setupInitLayer
  879. }
  880. // Parse the remapped root (user namespace) option, which can be one of:
  881. // username - valid username from /etc/passwd
  882. // username:groupname - valid username; valid groupname from /etc/group
  883. // uid - 32-bit unsigned int valid Linux UID value
  884. // uid:gid - uid value; 32-bit unsigned int Linux GID value
  885. //
  886. // If no groupname is specified, and a username is specified, an attempt
  887. // will be made to lookup a gid for that username as a groupname
  888. //
  889. // If names are used, they are verified to exist in passwd/group
  890. func parseRemappedRoot(usergrp string) (string, string, error) {
  891. var (
  892. userID, groupID int
  893. username, groupname string
  894. )
  895. idparts := strings.Split(usergrp, ":")
  896. if len(idparts) > 2 {
  897. return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
  898. }
  899. if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
  900. // must be a uid; take it as valid
  901. userID = int(uid)
  902. luser, err := idtools.LookupUID(userID)
  903. if err != nil {
  904. return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
  905. }
  906. username = luser.Name
  907. if len(idparts) == 1 {
  908. // if the uid was numeric and no gid was specified, take the uid as the gid
  909. groupID = userID
  910. lgrp, err := idtools.LookupGID(groupID)
  911. if err != nil {
  912. return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
  913. }
  914. groupname = lgrp.Name
  915. }
  916. } else {
  917. lookupName := idparts[0]
  918. // special case: if the user specified "default", they want Docker to create or
  919. // use (after creation) the "dockremap" user/group for root remapping
  920. if lookupName == defaultIDSpecifier {
  921. lookupName = defaultRemappedID
  922. }
  923. luser, err := idtools.LookupUser(lookupName)
  924. if err != nil && idparts[0] != defaultIDSpecifier {
  925. // error if the name requested isn't the special "dockremap" ID
  926. return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
  927. } else if err != nil {
  928. // special case-- if the username == "default", then we have been asked
  929. // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
  930. // ranges will be used for the user and group mappings in user namespaced containers
  931. _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
  932. if err == nil {
  933. return defaultRemappedID, defaultRemappedID, nil
  934. }
  935. return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
  936. }
  937. username = luser.Name
  938. if len(idparts) == 1 {
  939. // we only have a string username, and no group specified; look up gid from username as group
  940. group, err := idtools.LookupGroup(lookupName)
  941. if err != nil {
  942. return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
  943. }
  944. groupname = group.Name
  945. }
  946. }
  947. if len(idparts) == 2 {
  948. // groupname or gid is separately specified and must be resolved
  949. // to an unsigned 32-bit gid
  950. if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
  951. // must be a gid, take it as valid
  952. groupID = int(gid)
  953. lgrp, err := idtools.LookupGID(groupID)
  954. if err != nil {
  955. return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
  956. }
  957. groupname = lgrp.Name
  958. } else {
  959. // not a number; attempt a lookup
  960. if _, err := idtools.LookupGroup(idparts[1]); err != nil {
  961. return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err)
  962. }
  963. groupname = idparts[1]
  964. }
  965. }
  966. return username, groupname, nil
  967. }
  968. func setupRemappedRoot(config *config.Config) (*idtools.IDMappings, error) {
  969. if runtime.GOOS != "linux" && config.RemappedRoot != "" {
  970. return nil, fmt.Errorf("User namespaces are only supported on Linux")
  971. }
  972. // if the daemon was started with remapped root option, parse
  973. // the config option to the int uid,gid values
  974. if config.RemappedRoot != "" {
  975. username, groupname, err := parseRemappedRoot(config.RemappedRoot)
  976. if err != nil {
  977. return nil, err
  978. }
  979. if username == "root" {
  980. // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
  981. // effectively
  982. logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
  983. return &idtools.IDMappings{}, nil
  984. }
  985. logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
  986. // update remapped root setting now that we have resolved them to actual names
  987. config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
  988. mappings, err := idtools.NewIDMappings(username, groupname)
  989. if err != nil {
  990. return nil, errors.Wrapf(err, "Can't create ID mappings: %v")
  991. }
  992. return mappings, nil
  993. }
  994. return &idtools.IDMappings{}, nil
  995. }
  996. func setupDaemonRoot(config *config.Config, rootDir string, rootIDs idtools.IDPair) error {
  997. config.Root = rootDir
  998. // the docker root metadata directory needs to have execute permissions for all users (g+x,o+x)
  999. // so that syscalls executing as non-root, operating on subdirectories of the graph root
  1000. // (e.g. mounted layers of a container) can traverse this path.
  1001. // The user namespace support will create subdirectories for the remapped root host uid:gid
  1002. // pair owned by that same uid:gid pair for proper write access to those needed metadata and
  1003. // layer content subtrees.
  1004. if _, err := os.Stat(rootDir); err == nil {
  1005. // root current exists; verify the access bits are correct by setting them
  1006. if err = os.Chmod(rootDir, 0711); err != nil {
  1007. return err
  1008. }
  1009. } else if os.IsNotExist(err) {
  1010. // no root exists yet, create it 0711 with root:root ownership
  1011. if err := os.MkdirAll(rootDir, 0711); err != nil {
  1012. return err
  1013. }
  1014. }
  1015. // if user namespaces are enabled we will create a subtree underneath the specified root
  1016. // with any/all specified remapped root uid/gid options on the daemon creating
  1017. // a new subdirectory with ownership set to the remapped uid/gid (so as to allow
  1018. // `chdir()` to work for containers namespaced to that uid/gid)
  1019. if config.RemappedRoot != "" {
  1020. config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootIDs.UID, rootIDs.GID))
  1021. logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
  1022. // Create the root directory if it doesn't exist
  1023. if err := idtools.MkdirAllAndChown(config.Root, 0700, rootIDs); err != nil {
  1024. return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
  1025. }
  1026. // we also need to verify that any pre-existing directories in the path to
  1027. // the graphroot won't block access to remapped root--if any pre-existing directory
  1028. // has strict permissions that don't allow "x", container start will fail, so
  1029. // better to warn and fail now
  1030. dirPath := config.Root
  1031. for {
  1032. dirPath = filepath.Dir(dirPath)
  1033. if dirPath == "/" {
  1034. break
  1035. }
  1036. if !idtools.CanAccess(dirPath, rootIDs) {
  1037. return fmt.Errorf("A subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories.", config.Root)
  1038. }
  1039. }
  1040. }
  1041. return nil
  1042. }
  1043. // registerLinks writes the links to a file.
  1044. func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
  1045. if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() {
  1046. return nil
  1047. }
  1048. for _, l := range hostConfig.Links {
  1049. name, alias, err := opts.ParseLink(l)
  1050. if err != nil {
  1051. return err
  1052. }
  1053. child, err := daemon.GetContainer(name)
  1054. if err != nil {
  1055. return errors.Wrapf(err, "could not get container for %s", name)
  1056. }
  1057. for child.HostConfig.NetworkMode.IsContainer() {
  1058. parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2)
  1059. child, err = daemon.GetContainer(parts[1])
  1060. if err != nil {
  1061. return errors.Wrapf(err, "Could not get container for %s", parts[1])
  1062. }
  1063. }
  1064. if child.HostConfig.NetworkMode.IsHost() {
  1065. return runconfig.ErrConflictHostNetworkAndLinks
  1066. }
  1067. if err := daemon.registerLink(container, child, alias); err != nil {
  1068. return err
  1069. }
  1070. }
  1071. // After we load all the links into the daemon
  1072. // set them to nil on the hostconfig
  1073. _, err := container.WriteHostConfig()
  1074. return err
  1075. }
  1076. // conditionalMountOnStart is a platform specific helper function during the
  1077. // container start to call mount.
  1078. func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
  1079. return daemon.Mount(container)
  1080. }
  1081. // conditionalUnmountOnCleanup is a platform specific helper function called
  1082. // during the cleanup of a container to unmount.
  1083. func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
  1084. return daemon.Unmount(container)
  1085. }
  1086. func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
  1087. if !c.IsRunning() {
  1088. return nil, errNotRunning(c.ID)
  1089. }
  1090. stats, err := daemon.containerd.Stats(c.ID)
  1091. if err != nil {
  1092. if strings.Contains(err.Error(), "container not found") {
  1093. return nil, containerNotFound(c.ID)
  1094. }
  1095. return nil, err
  1096. }
  1097. s := &types.StatsJSON{}
  1098. cgs := stats.CgroupStats
  1099. if cgs != nil {
  1100. s.BlkioStats = types.BlkioStats{
  1101. IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
  1102. IoServicedRecursive: copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
  1103. IoQueuedRecursive: copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
  1104. IoServiceTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
  1105. IoWaitTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
  1106. IoMergedRecursive: copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
  1107. IoTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
  1108. SectorsRecursive: copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
  1109. }
  1110. cpu := cgs.CpuStats
  1111. s.CPUStats = types.CPUStats{
  1112. CPUUsage: types.CPUUsage{
  1113. TotalUsage: cpu.CpuUsage.TotalUsage,
  1114. PercpuUsage: cpu.CpuUsage.PercpuUsage,
  1115. UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
  1116. UsageInUsermode: cpu.CpuUsage.UsageInUsermode,
  1117. },
  1118. ThrottlingData: types.ThrottlingData{
  1119. Periods: cpu.ThrottlingData.Periods,
  1120. ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
  1121. ThrottledTime: cpu.ThrottlingData.ThrottledTime,
  1122. },
  1123. }
  1124. mem := cgs.MemoryStats.Usage
  1125. s.MemoryStats = types.MemoryStats{
  1126. Usage: mem.Usage,
  1127. MaxUsage: mem.MaxUsage,
  1128. Stats: cgs.MemoryStats.Stats,
  1129. Failcnt: mem.Failcnt,
  1130. Limit: mem.Limit,
  1131. }
  1132. // if the container does not set memory limit, use the machineMemory
  1133. if mem.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
  1134. s.MemoryStats.Limit = daemon.machineMemory
  1135. }
  1136. if cgs.PidsStats != nil {
  1137. s.PidsStats = types.PidsStats{
  1138. Current: cgs.PidsStats.Current,
  1139. }
  1140. }
  1141. }
  1142. s.Read, err = ptypes.Timestamp(stats.Timestamp)
  1143. if err != nil {
  1144. return nil, err
  1145. }
  1146. return s, nil
  1147. }
  1148. // setDefaultIsolation determines the default isolation mode for the
  1149. // daemon to run in. This is only applicable on Windows
  1150. func (daemon *Daemon) setDefaultIsolation() error {
  1151. return nil
  1152. }
  1153. func rootFSToAPIType(rootfs *image.RootFS) types.RootFS {
  1154. var layers []string
  1155. for _, l := range rootfs.DiffIDs {
  1156. layers = append(layers, l.String())
  1157. }
  1158. return types.RootFS{
  1159. Type: rootfs.Type,
  1160. Layers: layers,
  1161. }
  1162. }
  1163. // setupDaemonProcess sets various settings for the daemon's process
  1164. func setupDaemonProcess(config *config.Config) error {
  1165. // setup the daemons oom_score_adj
  1166. return setupOOMScoreAdj(config.OOMScoreAdjust)
  1167. }
  1168. func setupOOMScoreAdj(score int) error {
  1169. f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0)
  1170. if err != nil {
  1171. return err
  1172. }
  1173. defer f.Close()
  1174. stringScore := strconv.Itoa(score)
  1175. _, err = f.WriteString(stringScore)
  1176. if os.IsPermission(err) {
  1177. // Setting oom_score_adj does not work in an
  1178. // unprivileged container. Ignore the error, but log
  1179. // it if we appear not to be in that situation.
  1180. if !rsystem.RunningInUserNS() {
  1181. logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore)
  1182. }
  1183. return nil
  1184. }
  1185. return err
  1186. }
  1187. func (daemon *Daemon) initCgroupsPath(path string) error {
  1188. if path == "/" || path == "." {
  1189. return nil
  1190. }
  1191. if daemon.configStore.CPURealtimePeriod == 0 && daemon.configStore.CPURealtimeRuntime == 0 {
  1192. return nil
  1193. }
  1194. // Recursively create cgroup to ensure that the system and all parent cgroups have values set
  1195. // for the period and runtime as this limits what the children can be set to.
  1196. daemon.initCgroupsPath(filepath.Dir(path))
  1197. mnt, root, err := cgroups.FindCgroupMountpointAndRoot("cpu")
  1198. if err != nil {
  1199. return err
  1200. }
  1201. // When docker is run inside docker, the root is based of the host cgroup.
  1202. // Should this be handled in runc/libcontainer/cgroups ?
  1203. if strings.HasPrefix(root, "/docker/") {
  1204. root = "/"
  1205. }
  1206. path = filepath.Join(mnt, root, path)
  1207. sysinfo := sysinfo.New(true)
  1208. if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil {
  1209. return err
  1210. }
  1211. if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimeRuntime, daemon.configStore.CPURealtimeRuntime, "cpu.rt_runtime_us", path); err != nil {
  1212. return err
  1213. }
  1214. return nil
  1215. }
  1216. func maybeCreateCPURealTimeFile(sysinfoPresent bool, configValue int64, file string, path string) error {
  1217. if sysinfoPresent && configValue != 0 {
  1218. if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
  1219. return err
  1220. }
  1221. if err := ioutil.WriteFile(filepath.Join(path, file), []byte(strconv.FormatInt(configValue, 10)), 0700); err != nil {
  1222. return err
  1223. }
  1224. }
  1225. return nil
  1226. }
  1227. func (daemon *Daemon) setupSeccompProfile() error {
  1228. if daemon.configStore.SeccompProfile != "" {
  1229. daemon.seccompProfilePath = daemon.configStore.SeccompProfile
  1230. b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile)
  1231. if err != nil {
  1232. return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err)
  1233. }
  1234. daemon.seccompProfile = b
  1235. }
  1236. return nil
  1237. }