daemon_unix.go 57 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562
  1. // +build linux freebsd
  2. package daemon // import "github.com/docker/docker/daemon"
  3. import (
  4. "bufio"
  5. "context"
  6. "fmt"
  7. "io/ioutil"
  8. "net"
  9. "os"
  10. "path/filepath"
  11. "runtime"
  12. "runtime/debug"
  13. "strconv"
  14. "strings"
  15. "time"
  16. containerd_cgroups "github.com/containerd/cgroups"
  17. "github.com/docker/docker/api/types"
  18. "github.com/docker/docker/api/types/blkiodev"
  19. pblkiodev "github.com/docker/docker/api/types/blkiodev"
  20. containertypes "github.com/docker/docker/api/types/container"
  21. "github.com/docker/docker/container"
  22. "github.com/docker/docker/daemon/config"
  23. "github.com/docker/docker/daemon/initlayer"
  24. "github.com/docker/docker/opts"
  25. "github.com/docker/docker/pkg/containerfs"
  26. "github.com/docker/docker/pkg/idtools"
  27. "github.com/docker/docker/pkg/ioutils"
  28. "github.com/docker/docker/pkg/mount"
  29. "github.com/docker/docker/pkg/parsers"
  30. "github.com/docker/docker/pkg/parsers/kernel"
  31. "github.com/docker/docker/pkg/sysinfo"
  32. "github.com/docker/docker/runconfig"
  33. volumemounts "github.com/docker/docker/volume/mounts"
  34. "github.com/docker/libnetwork"
  35. nwconfig "github.com/docker/libnetwork/config"
  36. "github.com/docker/libnetwork/drivers/bridge"
  37. "github.com/docker/libnetwork/netlabel"
  38. "github.com/docker/libnetwork/netutils"
  39. "github.com/docker/libnetwork/options"
  40. lntypes "github.com/docker/libnetwork/types"
  41. "github.com/opencontainers/runc/libcontainer/cgroups"
  42. rsystem "github.com/opencontainers/runc/libcontainer/system"
  43. "github.com/opencontainers/runtime-spec/specs-go"
  44. "github.com/opencontainers/selinux/go-selinux/label"
  45. "github.com/pkg/errors"
  46. "github.com/sirupsen/logrus"
  47. "github.com/vishvananda/netlink"
  48. "golang.org/x/sys/unix"
  49. )
  50. const (
  51. // DefaultShimBinary is the default shim to be used by containerd if none
  52. // is specified
  53. DefaultShimBinary = "containerd-shim"
  54. // DefaultRuntimeBinary is the default runtime to be used by
  55. // containerd if none is specified
  56. DefaultRuntimeBinary = "runc"
  57. // See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
  58. linuxMinCPUShares = 2
  59. linuxMaxCPUShares = 262144
  60. platformSupported = true
  61. // It's not kernel limit, we want this 4M limit to supply a reasonable functional container
  62. linuxMinMemory = 4194304
  63. // constants for remapped root settings
  64. defaultIDSpecifier = "default"
  65. defaultRemappedID = "dockremap"
  66. // constant for cgroup drivers
  67. cgroupFsDriver = "cgroupfs"
  68. cgroupSystemdDriver = "systemd"
  69. // DefaultRuntimeName is the default runtime to be used by
  70. // containerd if none is specified
  71. DefaultRuntimeName = "runc"
  72. )
  73. type containerGetter interface {
  74. GetContainer(string) (*container.Container, error)
  75. }
  76. func getMemoryResources(config containertypes.Resources) *specs.LinuxMemory {
  77. memory := specs.LinuxMemory{}
  78. if config.Memory > 0 {
  79. memory.Limit = &config.Memory
  80. }
  81. if config.MemoryReservation > 0 {
  82. memory.Reservation = &config.MemoryReservation
  83. }
  84. if config.MemorySwap > 0 {
  85. memory.Swap = &config.MemorySwap
  86. }
  87. if config.MemorySwappiness != nil {
  88. swappiness := uint64(*config.MemorySwappiness)
  89. memory.Swappiness = &swappiness
  90. }
  91. if config.OomKillDisable != nil {
  92. memory.DisableOOMKiller = config.OomKillDisable
  93. }
  94. if config.KernelMemory != 0 {
  95. memory.Kernel = &config.KernelMemory
  96. }
  97. if config.KernelMemoryTCP != 0 {
  98. memory.KernelTCP = &config.KernelMemoryTCP
  99. }
  100. return &memory
  101. }
  102. func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) {
  103. cpu := specs.LinuxCPU{}
  104. if config.CPUShares < 0 {
  105. return nil, fmt.Errorf("shares: invalid argument")
  106. }
  107. if config.CPUShares >= 0 {
  108. shares := uint64(config.CPUShares)
  109. cpu.Shares = &shares
  110. }
  111. if config.CpusetCpus != "" {
  112. cpu.Cpus = config.CpusetCpus
  113. }
  114. if config.CpusetMems != "" {
  115. cpu.Mems = config.CpusetMems
  116. }
  117. if config.NanoCPUs > 0 {
  118. // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
  119. period := uint64(100 * time.Millisecond / time.Microsecond)
  120. quota := config.NanoCPUs * int64(period) / 1e9
  121. cpu.Period = &period
  122. cpu.Quota = &quota
  123. }
  124. if config.CPUPeriod != 0 {
  125. period := uint64(config.CPUPeriod)
  126. cpu.Period = &period
  127. }
  128. if config.CPUQuota != 0 {
  129. q := config.CPUQuota
  130. cpu.Quota = &q
  131. }
  132. if config.CPURealtimePeriod != 0 {
  133. period := uint64(config.CPURealtimePeriod)
  134. cpu.RealtimePeriod = &period
  135. }
  136. if config.CPURealtimeRuntime != 0 {
  137. c := config.CPURealtimeRuntime
  138. cpu.RealtimeRuntime = &c
  139. }
  140. return &cpu, nil
  141. }
  142. func getBlkioWeightDevices(config containertypes.Resources) ([]specs.LinuxWeightDevice, error) {
  143. var stat unix.Stat_t
  144. var blkioWeightDevices []specs.LinuxWeightDevice
  145. for _, weightDevice := range config.BlkioWeightDevice {
  146. if err := unix.Stat(weightDevice.Path, &stat); err != nil {
  147. return nil, err
  148. }
  149. weight := weightDevice.Weight
  150. d := specs.LinuxWeightDevice{Weight: &weight}
  151. d.Major = int64(stat.Rdev / 256)
  152. d.Minor = int64(stat.Rdev % 256)
  153. blkioWeightDevices = append(blkioWeightDevices, d)
  154. }
  155. return blkioWeightDevices, nil
  156. }
  157. func (daemon *Daemon) parseSecurityOpt(container *container.Container, hostConfig *containertypes.HostConfig) error {
  158. container.NoNewPrivileges = daemon.configStore.NoNewPrivileges
  159. return parseSecurityOpt(container, hostConfig)
  160. }
  161. func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error {
  162. var (
  163. labelOpts []string
  164. err error
  165. )
  166. for _, opt := range config.SecurityOpt {
  167. if opt == "no-new-privileges" {
  168. container.NoNewPrivileges = true
  169. continue
  170. }
  171. if opt == "disable" {
  172. labelOpts = append(labelOpts, "disable")
  173. continue
  174. }
  175. var con []string
  176. if strings.Contains(opt, "=") {
  177. con = strings.SplitN(opt, "=", 2)
  178. } else if strings.Contains(opt, ":") {
  179. con = strings.SplitN(opt, ":", 2)
  180. logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 17.04, use `=` instead.")
  181. }
  182. if len(con) != 2 {
  183. return fmt.Errorf("invalid --security-opt 1: %q", opt)
  184. }
  185. switch con[0] {
  186. case "label":
  187. labelOpts = append(labelOpts, con[1])
  188. case "apparmor":
  189. container.AppArmorProfile = con[1]
  190. case "seccomp":
  191. container.SeccompProfile = con[1]
  192. case "no-new-privileges":
  193. noNewPrivileges, err := strconv.ParseBool(con[1])
  194. if err != nil {
  195. return fmt.Errorf("invalid --security-opt 2: %q", opt)
  196. }
  197. container.NoNewPrivileges = noNewPrivileges
  198. default:
  199. return fmt.Errorf("invalid --security-opt 2: %q", opt)
  200. }
  201. }
  202. container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts)
  203. return err
  204. }
  205. func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.LinuxThrottleDevice, error) {
  206. var throttleDevices []specs.LinuxThrottleDevice
  207. var stat unix.Stat_t
  208. for _, d := range devs {
  209. if err := unix.Stat(d.Path, &stat); err != nil {
  210. return nil, err
  211. }
  212. d := specs.LinuxThrottleDevice{Rate: d.Rate}
  213. d.Major = int64(stat.Rdev / 256)
  214. d.Minor = int64(stat.Rdev % 256)
  215. throttleDevices = append(throttleDevices, d)
  216. }
  217. return throttleDevices, nil
  218. }
  219. // adjustParallelLimit takes a number of objects and a proposed limit and
  220. // figures out if it's reasonable (and adjusts it accordingly). This is only
  221. // used for daemon startup, which does a lot of parallel loading of containers
  222. // (and if we exceed RLIMIT_NOFILE then we're in trouble).
  223. func adjustParallelLimit(n int, limit int) int {
  224. // Rule-of-thumb overhead factor (how many files will each goroutine open
  225. // simultaneously). Yes, this is ugly but to be frank this whole thing is
  226. // ugly.
  227. const overhead = 2
  228. // On Linux, we need to ensure that parallelStartupJobs doesn't cause us to
  229. // exceed RLIMIT_NOFILE. If parallelStartupJobs is too large, we reduce it
  230. // and give a warning (since in theory the user should increase their
  231. // ulimits to the largest possible value for dockerd).
  232. var rlim unix.Rlimit
  233. if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlim); err != nil {
  234. logrus.Warnf("Couldn't find dockerd's RLIMIT_NOFILE to double-check startup parallelism factor: %v", err)
  235. return limit
  236. }
  237. softRlimit := int(rlim.Cur)
  238. // Much fewer containers than RLIMIT_NOFILE. No need to adjust anything.
  239. if softRlimit > overhead*n {
  240. return limit
  241. }
  242. // RLIMIT_NOFILE big enough, no need to adjust anything.
  243. if softRlimit > overhead*limit {
  244. return limit
  245. }
  246. logrus.Warnf("Found dockerd's open file ulimit (%v) is far too small -- consider increasing it significantly (at least %v)", softRlimit, overhead*limit)
  247. return softRlimit / overhead
  248. }
  249. func checkKernel() error {
  250. // Check for unsupported kernel versions
  251. // FIXME: it would be cleaner to not test for specific versions, but rather
  252. // test for specific functionalities.
  253. // Unfortunately we can't test for the feature "does not cause a kernel panic"
  254. // without actually causing a kernel panic, so we need this workaround until
  255. // the circumstances of pre-3.10 crashes are clearer.
  256. // For details see https://github.com/docker/docker/issues/407
  257. // Docker 1.11 and above doesn't actually run on kernels older than 3.4,
  258. // due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4).
  259. if !kernel.CheckKernelVersion(3, 10, 0) {
  260. v, _ := kernel.GetKernelVersion()
  261. if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" {
  262. logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String())
  263. }
  264. }
  265. return nil
  266. }
  267. // adaptContainerSettings is called during container creation to modify any
  268. // settings necessary in the HostConfig structure.
  269. func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error {
  270. if adjustCPUShares && hostConfig.CPUShares > 0 {
  271. // Handle unsupported CPUShares
  272. if hostConfig.CPUShares < linuxMinCPUShares {
  273. logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares)
  274. hostConfig.CPUShares = linuxMinCPUShares
  275. } else if hostConfig.CPUShares > linuxMaxCPUShares {
  276. logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares)
  277. hostConfig.CPUShares = linuxMaxCPUShares
  278. }
  279. }
  280. if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
  281. // By default, MemorySwap is set to twice the size of Memory.
  282. hostConfig.MemorySwap = hostConfig.Memory * 2
  283. }
  284. if hostConfig.ShmSize == 0 {
  285. hostConfig.ShmSize = config.DefaultShmSize
  286. if daemon.configStore != nil {
  287. hostConfig.ShmSize = int64(daemon.configStore.ShmSize)
  288. }
  289. }
  290. // Set default IPC mode, if unset for container
  291. if hostConfig.IpcMode.IsEmpty() {
  292. m := config.DefaultIpcMode
  293. if daemon.configStore != nil {
  294. m = daemon.configStore.IpcMode
  295. }
  296. hostConfig.IpcMode = containertypes.IpcMode(m)
  297. }
  298. adaptSharedNamespaceContainer(daemon, hostConfig)
  299. var err error
  300. opts, err := daemon.generateSecurityOpt(hostConfig)
  301. if err != nil {
  302. return err
  303. }
  304. hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...)
  305. if hostConfig.OomKillDisable == nil {
  306. defaultOomKillDisable := false
  307. hostConfig.OomKillDisable = &defaultOomKillDisable
  308. }
  309. return nil
  310. }
  311. // adaptSharedNamespaceContainer replaces container name with its ID in hostConfig.
  312. // To be more precisely, it modifies `container:name` to `container:ID` of PidMode, IpcMode
  313. // and NetworkMode.
  314. //
  315. // When a container shares its namespace with another container, use ID can keep the namespace
  316. // sharing connection between the two containers even the another container is renamed.
  317. func adaptSharedNamespaceContainer(daemon containerGetter, hostConfig *containertypes.HostConfig) {
  318. containerPrefix := "container:"
  319. if hostConfig.PidMode.IsContainer() {
  320. pidContainer := hostConfig.PidMode.Container()
  321. // if there is any error returned here, we just ignore it and leave it to be
  322. // handled in the following logic
  323. if c, err := daemon.GetContainer(pidContainer); err == nil {
  324. hostConfig.PidMode = containertypes.PidMode(containerPrefix + c.ID)
  325. }
  326. }
  327. if hostConfig.IpcMode.IsContainer() {
  328. ipcContainer := hostConfig.IpcMode.Container()
  329. if c, err := daemon.GetContainer(ipcContainer); err == nil {
  330. hostConfig.IpcMode = containertypes.IpcMode(containerPrefix + c.ID)
  331. }
  332. }
  333. if hostConfig.NetworkMode.IsContainer() {
  334. netContainer := hostConfig.NetworkMode.ConnectedContainer()
  335. if c, err := daemon.GetContainer(netContainer); err == nil {
  336. hostConfig.NetworkMode = containertypes.NetworkMode(containerPrefix + c.ID)
  337. }
  338. }
  339. }
  340. func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) {
  341. warnings := []string{}
  342. fixMemorySwappiness(resources)
  343. // memory subsystem checks and adjustments
  344. if resources.Memory != 0 && resources.Memory < linuxMinMemory {
  345. return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB")
  346. }
  347. if resources.Memory > 0 && !sysInfo.MemoryLimit {
  348. warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
  349. logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
  350. resources.Memory = 0
  351. resources.MemorySwap = -1
  352. }
  353. if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit {
  354. warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.")
  355. logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.")
  356. resources.MemorySwap = -1
  357. }
  358. if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory {
  359. return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage")
  360. }
  361. if resources.Memory == 0 && resources.MemorySwap > 0 && !update {
  362. return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage")
  363. }
  364. if resources.MemorySwappiness != nil && !sysInfo.MemorySwappiness {
  365. warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.")
  366. logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.")
  367. resources.MemorySwappiness = nil
  368. }
  369. if resources.MemorySwappiness != nil {
  370. swappiness := *resources.MemorySwappiness
  371. if swappiness < 0 || swappiness > 100 {
  372. return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness)
  373. }
  374. }
  375. if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation {
  376. warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
  377. logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
  378. resources.MemoryReservation = 0
  379. }
  380. if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory {
  381. return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB")
  382. }
  383. if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation {
  384. return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage")
  385. }
  386. if resources.KernelMemory > 0 && !sysInfo.KernelMemory {
  387. warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
  388. logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
  389. resources.KernelMemory = 0
  390. }
  391. if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory {
  392. return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB")
  393. }
  394. if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) {
  395. warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
  396. logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
  397. }
  398. if resources.OomKillDisable != nil && !sysInfo.OomKillDisable {
  399. // only produce warnings if the setting wasn't to *disable* the OOM Kill; no point
  400. // warning the caller if they already wanted the feature to be off
  401. if *resources.OomKillDisable {
  402. warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.")
  403. logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.")
  404. }
  405. resources.OomKillDisable = nil
  406. }
  407. if resources.PidsLimit != 0 && !sysInfo.PidsLimit {
  408. warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
  409. logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
  410. resources.PidsLimit = 0
  411. }
  412. // cpu subsystem checks and adjustments
  413. if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 {
  414. return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set")
  415. }
  416. if resources.NanoCPUs > 0 && resources.CPUQuota > 0 {
  417. return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set")
  418. }
  419. if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) {
  420. return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted")
  421. }
  422. // The highest precision we could get on Linux is 0.001, by setting
  423. // cpu.cfs_period_us=1000ms
  424. // cpu.cfs_quota=1ms
  425. // See the following link for details:
  426. // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
  427. // Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error.
  428. // The error message is 0.01 so that this is consistent with Windows
  429. if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 {
  430. return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU())
  431. }
  432. if resources.CPUShares > 0 && !sysInfo.CPUShares {
  433. warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
  434. logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
  435. resources.CPUShares = 0
  436. }
  437. if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod {
  438. warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
  439. logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
  440. resources.CPUPeriod = 0
  441. }
  442. if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) {
  443. return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)")
  444. }
  445. if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota {
  446. warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
  447. logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
  448. resources.CPUQuota = 0
  449. }
  450. if resources.CPUQuota > 0 && resources.CPUQuota < 1000 {
  451. return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)")
  452. }
  453. if resources.CPUPercent > 0 {
  454. warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS))
  455. logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)
  456. resources.CPUPercent = 0
  457. }
  458. // cpuset subsystem checks and adjustments
  459. if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset {
  460. warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
  461. logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
  462. resources.CpusetCpus = ""
  463. resources.CpusetMems = ""
  464. }
  465. cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus)
  466. if err != nil {
  467. return warnings, errors.Wrapf(err, "Invalid value %s for cpuset cpus", resources.CpusetCpus)
  468. }
  469. if !cpusAvailable {
  470. return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus)
  471. }
  472. memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems)
  473. if err != nil {
  474. return warnings, errors.Wrapf(err, "Invalid value %s for cpuset mems", resources.CpusetMems)
  475. }
  476. if !memsAvailable {
  477. return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems)
  478. }
  479. // blkio subsystem checks and adjustments
  480. if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight {
  481. warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
  482. logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
  483. resources.BlkioWeight = 0
  484. }
  485. if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) {
  486. return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000")
  487. }
  488. if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 {
  489. return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS)
  490. }
  491. if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice {
  492. warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
  493. logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
  494. resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{}
  495. }
  496. if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice {
  497. warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.")
  498. logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded")
  499. resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{}
  500. }
  501. if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice {
  502. warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
  503. logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
  504. resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{}
  505. }
  506. if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice {
  507. warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
  508. logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
  509. resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{}
  510. }
  511. if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice {
  512. warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
  513. logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
  514. resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{}
  515. }
  516. return warnings, nil
  517. }
  518. func (daemon *Daemon) getCgroupDriver() string {
  519. cgroupDriver := cgroupFsDriver
  520. if UsingSystemd(daemon.configStore) {
  521. cgroupDriver = cgroupSystemdDriver
  522. }
  523. return cgroupDriver
  524. }
  525. // getCD gets the raw value of the native.cgroupdriver option, if set.
  526. func getCD(config *config.Config) string {
  527. for _, option := range config.ExecOptions {
  528. key, val, err := parsers.ParseKeyValueOpt(option)
  529. if err != nil || !strings.EqualFold(key, "native.cgroupdriver") {
  530. continue
  531. }
  532. return val
  533. }
  534. return ""
  535. }
  536. // VerifyCgroupDriver validates native.cgroupdriver
  537. func VerifyCgroupDriver(config *config.Config) error {
  538. cd := getCD(config)
  539. if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver {
  540. return nil
  541. }
  542. return fmt.Errorf("native.cgroupdriver option %s not supported", cd)
  543. }
  544. // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd
  545. func UsingSystemd(config *config.Config) bool {
  546. return getCD(config) == cgroupSystemdDriver
  547. }
  548. // verifyPlatformContainerSettings performs platform-specific validation of the
  549. // hostconfig and config structures.
  550. func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) {
  551. var warnings []string
  552. sysInfo := sysinfo.New(true)
  553. w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update)
  554. // no matter err is nil or not, w could have data in itself.
  555. warnings = append(warnings, w...)
  556. if err != nil {
  557. return warnings, err
  558. }
  559. if hostConfig.ShmSize < 0 {
  560. return warnings, fmt.Errorf("SHM size can not be less than 0")
  561. }
  562. if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 {
  563. return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj)
  564. }
  565. // ip-forwarding does not affect container with '--net=host' (or '--net=none')
  566. if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) {
  567. warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
  568. logrus.Warn("IPv4 forwarding is disabled. Networking will not work")
  569. }
  570. // check for various conflicting options with user namespaces
  571. if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
  572. if hostConfig.Privileged {
  573. return warnings, fmt.Errorf("privileged mode is incompatible with user namespaces. You must run the container in the host namespace when running privileged mode")
  574. }
  575. if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
  576. return warnings, fmt.Errorf("cannot share the host's network namespace when user namespaces are enabled")
  577. }
  578. if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
  579. return warnings, fmt.Errorf("cannot share the host PID namespace when user namespaces are enabled")
  580. }
  581. }
  582. if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) {
  583. // CgroupParent for systemd cgroup should be named as "xxx.slice"
  584. if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") {
  585. return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
  586. }
  587. }
  588. if hostConfig.Runtime == "" {
  589. hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
  590. }
  591. if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil {
  592. return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime)
  593. }
  594. parser := volumemounts.NewParser(runtime.GOOS)
  595. for dest := range hostConfig.Tmpfs {
  596. if err := parser.ValidateTmpfsMountDestination(dest); err != nil {
  597. return warnings, err
  598. }
  599. }
  600. return warnings, nil
  601. }
  602. func (daemon *Daemon) loadRuntimes() error {
  603. return daemon.initRuntimes(daemon.configStore.Runtimes)
  604. }
  605. func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error) {
  606. runtimeDir := filepath.Join(daemon.configStore.Root, "runtimes")
  607. // Remove old temp directory if any
  608. os.RemoveAll(runtimeDir + "-old")
  609. tmpDir, err := ioutils.TempDir(daemon.configStore.Root, "gen-runtimes")
  610. if err != nil {
  611. return errors.Wrap(err, "failed to get temp dir to generate runtime scripts")
  612. }
  613. defer func() {
  614. if err != nil {
  615. if err1 := os.RemoveAll(tmpDir); err1 != nil {
  616. logrus.WithError(err1).WithField("dir", tmpDir).
  617. Warn("failed to remove tmp dir")
  618. }
  619. return
  620. }
  621. if err = os.Rename(runtimeDir, runtimeDir+"-old"); err != nil {
  622. return
  623. }
  624. if err = os.Rename(tmpDir, runtimeDir); err != nil {
  625. err = errors.Wrap(err, "failed to setup runtimes dir, new containers may not start")
  626. return
  627. }
  628. if err = os.RemoveAll(runtimeDir + "-old"); err != nil {
  629. logrus.WithError(err).WithField("dir", tmpDir).
  630. Warn("failed to remove old runtimes dir")
  631. }
  632. }()
  633. for name, rt := range runtimes {
  634. if len(rt.Args) == 0 {
  635. continue
  636. }
  637. script := filepath.Join(tmpDir, name)
  638. content := fmt.Sprintf("#!/bin/sh\n%s %s $@\n", rt.Path, strings.Join(rt.Args, " "))
  639. if err := ioutil.WriteFile(script, []byte(content), 0700); err != nil {
  640. return err
  641. }
  642. }
  643. return nil
  644. }
  645. // verifyDaemonSettings performs validation of daemon config struct
  646. func verifyDaemonSettings(conf *config.Config) error {
  647. // Check for mutually incompatible config options
  648. if conf.BridgeConfig.Iface != "" && conf.BridgeConfig.IP != "" {
  649. return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one")
  650. }
  651. if !conf.BridgeConfig.EnableIPTables && !conf.BridgeConfig.InterContainerCommunication {
  652. return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true")
  653. }
  654. if !conf.BridgeConfig.EnableIPTables && conf.BridgeConfig.EnableIPMasq {
  655. conf.BridgeConfig.EnableIPMasq = false
  656. }
  657. if err := VerifyCgroupDriver(conf); err != nil {
  658. return err
  659. }
  660. if conf.CgroupParent != "" && UsingSystemd(conf) {
  661. if len(conf.CgroupParent) <= 6 || !strings.HasSuffix(conf.CgroupParent, ".slice") {
  662. return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
  663. }
  664. }
  665. if conf.DefaultRuntime == "" {
  666. conf.DefaultRuntime = config.StockRuntimeName
  667. }
  668. if conf.Runtimes == nil {
  669. conf.Runtimes = make(map[string]types.Runtime)
  670. }
  671. conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeName}
  672. return nil
  673. }
  674. // checkSystem validates platform-specific requirements
  675. func checkSystem() error {
  676. if os.Geteuid() != 0 {
  677. return fmt.Errorf("The Docker daemon needs to be run as root")
  678. }
  679. return checkKernel()
  680. }
  681. // configureMaxThreads sets the Go runtime max threads threshold
  682. // which is 90% of the kernel setting from /proc/sys/kernel/threads-max
  683. func configureMaxThreads(config *config.Config) error {
  684. mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
  685. if err != nil {
  686. return err
  687. }
  688. mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
  689. if err != nil {
  690. return err
  691. }
  692. maxThreads := (mtint / 100) * 90
  693. debug.SetMaxThreads(maxThreads)
  694. logrus.Debugf("Golang's threads limit set to %d", maxThreads)
  695. return nil
  696. }
  697. func overlaySupportsSelinux() (bool, error) {
  698. f, err := os.Open("/proc/kallsyms")
  699. if err != nil {
  700. if os.IsNotExist(err) {
  701. return false, nil
  702. }
  703. return false, err
  704. }
  705. defer f.Close()
  706. var symAddr, symType, symName, text string
  707. s := bufio.NewScanner(f)
  708. for s.Scan() {
  709. if err := s.Err(); err != nil {
  710. return false, err
  711. }
  712. text = s.Text()
  713. if _, err := fmt.Sscanf(text, "%s %s %s", &symAddr, &symType, &symName); err != nil {
  714. return false, fmt.Errorf("Scanning '%s' failed: %s", text, err)
  715. }
  716. // Check for presence of symbol security_inode_copy_up.
  717. if symName == "security_inode_copy_up" {
  718. return true, nil
  719. }
  720. }
  721. return false, nil
  722. }
  723. // configureKernelSecuritySupport configures and validates security support for the kernel
  724. func configureKernelSecuritySupport(config *config.Config, driverName string) error {
  725. if config.EnableSelinuxSupport {
  726. if !selinuxEnabled() {
  727. logrus.Warn("Docker could not enable SELinux on the host system")
  728. return nil
  729. }
  730. if driverName == "overlay" || driverName == "overlay2" {
  731. // If driver is overlay or overlay2, make sure kernel
  732. // supports selinux with overlay.
  733. supported, err := overlaySupportsSelinux()
  734. if err != nil {
  735. return err
  736. }
  737. if !supported {
  738. logrus.Warnf("SELinux is not supported with the %v graph driver on this kernel", driverName)
  739. }
  740. }
  741. } else {
  742. selinuxSetDisabled()
  743. }
  744. return nil
  745. }
  746. func (daemon *Daemon) initNetworkController(config *config.Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) {
  747. netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes)
  748. if err != nil {
  749. return nil, err
  750. }
  751. controller, err := libnetwork.New(netOptions...)
  752. if err != nil {
  753. return nil, fmt.Errorf("error obtaining controller instance: %v", err)
  754. }
  755. if len(activeSandboxes) > 0 {
  756. logrus.Info("There are old running containers, the network config will not take affect")
  757. return controller, nil
  758. }
  759. // Initialize default network on "null"
  760. if n, _ := controller.NetworkByName("none"); n == nil {
  761. if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil {
  762. return nil, fmt.Errorf("Error creating default \"null\" network: %v", err)
  763. }
  764. }
  765. // Initialize default network on "host"
  766. if n, _ := controller.NetworkByName("host"); n == nil {
  767. if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil {
  768. return nil, fmt.Errorf("Error creating default \"host\" network: %v", err)
  769. }
  770. }
  771. // Clear stale bridge network
  772. if n, err := controller.NetworkByName("bridge"); err == nil {
  773. if err = n.Delete(); err != nil {
  774. return nil, fmt.Errorf("could not delete the default bridge network: %v", err)
  775. }
  776. if len(config.NetworkConfig.DefaultAddressPools.Value()) > 0 && !daemon.configStore.LiveRestoreEnabled {
  777. removeDefaultBridgeInterface()
  778. }
  779. }
  780. if !config.DisableBridge {
  781. // Initialize default driver "bridge"
  782. if err := initBridgeDriver(controller, config); err != nil {
  783. return nil, err
  784. }
  785. } else {
  786. removeDefaultBridgeInterface()
  787. }
  788. return controller, nil
  789. }
  790. func driverOptions(config *config.Config) []nwconfig.Option {
  791. bridgeConfig := options.Generic{
  792. "EnableIPForwarding": config.BridgeConfig.EnableIPForward,
  793. "EnableIPTables": config.BridgeConfig.EnableIPTables,
  794. "EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy,
  795. "UserlandProxyPath": config.BridgeConfig.UserlandProxyPath}
  796. bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
  797. dOptions := []nwconfig.Option{}
  798. dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption))
  799. return dOptions
  800. }
  801. func initBridgeDriver(controller libnetwork.NetworkController, config *config.Config) error {
  802. bridgeName := bridge.DefaultBridgeName
  803. if config.BridgeConfig.Iface != "" {
  804. bridgeName = config.BridgeConfig.Iface
  805. }
  806. netOption := map[string]string{
  807. bridge.BridgeName: bridgeName,
  808. bridge.DefaultBridge: strconv.FormatBool(true),
  809. netlabel.DriverMTU: strconv.Itoa(config.Mtu),
  810. bridge.EnableIPMasquerade: strconv.FormatBool(config.BridgeConfig.EnableIPMasq),
  811. bridge.EnableICC: strconv.FormatBool(config.BridgeConfig.InterContainerCommunication),
  812. }
  813. // --ip processing
  814. if config.BridgeConfig.DefaultIP != nil {
  815. netOption[bridge.DefaultBindingIP] = config.BridgeConfig.DefaultIP.String()
  816. }
  817. var (
  818. ipamV4Conf *libnetwork.IpamConf
  819. ipamV6Conf *libnetwork.IpamConf
  820. )
  821. ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
  822. nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName)
  823. if err != nil {
  824. return errors.Wrap(err, "list bridge addresses failed")
  825. }
  826. nw := nwList[0]
  827. if len(nwList) > 1 && config.BridgeConfig.FixedCIDR != "" {
  828. _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR)
  829. if err != nil {
  830. return errors.Wrap(err, "parse CIDR failed")
  831. }
  832. // Iterate through in case there are multiple addresses for the bridge
  833. for _, entry := range nwList {
  834. if fCIDR.Contains(entry.IP) {
  835. nw = entry
  836. break
  837. }
  838. }
  839. }
  840. ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
  841. hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
  842. if hip.IsGlobalUnicast() {
  843. ipamV4Conf.Gateway = nw.IP.String()
  844. }
  845. if config.BridgeConfig.IP != "" {
  846. ipamV4Conf.PreferredPool = config.BridgeConfig.IP
  847. ip, _, err := net.ParseCIDR(config.BridgeConfig.IP)
  848. if err != nil {
  849. return err
  850. }
  851. ipamV4Conf.Gateway = ip.String()
  852. } else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" {
  853. logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool)
  854. }
  855. if config.BridgeConfig.FixedCIDR != "" {
  856. _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR)
  857. if err != nil {
  858. return err
  859. }
  860. ipamV4Conf.SubPool = fCIDR.String()
  861. }
  862. if config.BridgeConfig.DefaultGatewayIPv4 != nil {
  863. ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.BridgeConfig.DefaultGatewayIPv4.String()
  864. }
  865. var deferIPv6Alloc bool
  866. if config.BridgeConfig.FixedCIDRv6 != "" {
  867. _, fCIDRv6, err := net.ParseCIDR(config.BridgeConfig.FixedCIDRv6)
  868. if err != nil {
  869. return err
  870. }
  871. // In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has
  872. // at least 48 host bits, we need to guarantee the current behavior where the containers'
  873. // IPv6 addresses will be constructed based on the containers' interface MAC address.
  874. // We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints
  875. // on this network until after the driver has created the endpoint and returned the
  876. // constructed address. Libnetwork will then reserve this address with the ipam driver.
  877. ones, _ := fCIDRv6.Mask.Size()
  878. deferIPv6Alloc = ones <= 80
  879. if ipamV6Conf == nil {
  880. ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
  881. }
  882. ipamV6Conf.PreferredPool = fCIDRv6.String()
  883. // In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6
  884. // address belongs to the same network, we need to inform libnetwork about it, so
  885. // that it can be reserved with IPAM and it will not be given away to somebody else
  886. for _, nw6 := range nw6List {
  887. if fCIDRv6.Contains(nw6.IP) {
  888. ipamV6Conf.Gateway = nw6.IP.String()
  889. break
  890. }
  891. }
  892. }
  893. if config.BridgeConfig.DefaultGatewayIPv6 != nil {
  894. if ipamV6Conf == nil {
  895. ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
  896. }
  897. ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.BridgeConfig.DefaultGatewayIPv6.String()
  898. }
  899. v4Conf := []*libnetwork.IpamConf{ipamV4Conf}
  900. v6Conf := []*libnetwork.IpamConf{}
  901. if ipamV6Conf != nil {
  902. v6Conf = append(v6Conf, ipamV6Conf)
  903. }
  904. // Initialize default network on "bridge" with the same name
  905. _, err = controller.NewNetwork("bridge", "bridge", "",
  906. libnetwork.NetworkOptionEnableIPv6(config.BridgeConfig.EnableIPv6),
  907. libnetwork.NetworkOptionDriverOpts(netOption),
  908. libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil),
  909. libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc))
  910. if err != nil {
  911. return fmt.Errorf("Error creating default \"bridge\" network: %v", err)
  912. }
  913. return nil
  914. }
  915. // Remove default bridge interface if present (--bridge=none use case)
  916. func removeDefaultBridgeInterface() {
  917. if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
  918. if err := netlink.LinkDel(lnk); err != nil {
  919. logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
  920. }
  921. }
  922. }
  923. func setupInitLayer(idMapping *idtools.IdentityMapping) func(containerfs.ContainerFS) error {
  924. return func(initPath containerfs.ContainerFS) error {
  925. return initlayer.Setup(initPath, idMapping.RootPair())
  926. }
  927. }
  928. // Parse the remapped root (user namespace) option, which can be one of:
  929. // username - valid username from /etc/passwd
  930. // username:groupname - valid username; valid groupname from /etc/group
  931. // uid - 32-bit unsigned int valid Linux UID value
  932. // uid:gid - uid value; 32-bit unsigned int Linux GID value
  933. //
  934. // If no groupname is specified, and a username is specified, an attempt
  935. // will be made to lookup a gid for that username as a groupname
  936. //
  937. // If names are used, they are verified to exist in passwd/group
  938. func parseRemappedRoot(usergrp string) (string, string, error) {
  939. var (
  940. userID, groupID int
  941. username, groupname string
  942. )
  943. idparts := strings.Split(usergrp, ":")
  944. if len(idparts) > 2 {
  945. return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
  946. }
  947. if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
  948. // must be a uid; take it as valid
  949. userID = int(uid)
  950. luser, err := idtools.LookupUID(userID)
  951. if err != nil {
  952. return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
  953. }
  954. username = luser.Name
  955. if len(idparts) == 1 {
  956. // if the uid was numeric and no gid was specified, take the uid as the gid
  957. groupID = userID
  958. lgrp, err := idtools.LookupGID(groupID)
  959. if err != nil {
  960. return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
  961. }
  962. groupname = lgrp.Name
  963. }
  964. } else {
  965. lookupName := idparts[0]
  966. // special case: if the user specified "default", they want Docker to create or
  967. // use (after creation) the "dockremap" user/group for root remapping
  968. if lookupName == defaultIDSpecifier {
  969. lookupName = defaultRemappedID
  970. }
  971. luser, err := idtools.LookupUser(lookupName)
  972. if err != nil && idparts[0] != defaultIDSpecifier {
  973. // error if the name requested isn't the special "dockremap" ID
  974. return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
  975. } else if err != nil {
  976. // special case-- if the username == "default", then we have been asked
  977. // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
  978. // ranges will be used for the user and group mappings in user namespaced containers
  979. _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
  980. if err == nil {
  981. return defaultRemappedID, defaultRemappedID, nil
  982. }
  983. return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
  984. }
  985. username = luser.Name
  986. if len(idparts) == 1 {
  987. // we only have a string username, and no group specified; look up gid from username as group
  988. group, err := idtools.LookupGroup(lookupName)
  989. if err != nil {
  990. return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
  991. }
  992. groupname = group.Name
  993. }
  994. }
  995. if len(idparts) == 2 {
  996. // groupname or gid is separately specified and must be resolved
  997. // to an unsigned 32-bit gid
  998. if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
  999. // must be a gid, take it as valid
  1000. groupID = int(gid)
  1001. lgrp, err := idtools.LookupGID(groupID)
  1002. if err != nil {
  1003. return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
  1004. }
  1005. groupname = lgrp.Name
  1006. } else {
  1007. // not a number; attempt a lookup
  1008. if _, err := idtools.LookupGroup(idparts[1]); err != nil {
  1009. return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err)
  1010. }
  1011. groupname = idparts[1]
  1012. }
  1013. }
  1014. return username, groupname, nil
  1015. }
  1016. func setupRemappedRoot(config *config.Config) (*idtools.IdentityMapping, error) {
  1017. if runtime.GOOS != "linux" && config.RemappedRoot != "" {
  1018. return nil, fmt.Errorf("User namespaces are only supported on Linux")
  1019. }
  1020. // if the daemon was started with remapped root option, parse
  1021. // the config option to the int uid,gid values
  1022. if config.RemappedRoot != "" {
  1023. username, groupname, err := parseRemappedRoot(config.RemappedRoot)
  1024. if err != nil {
  1025. return nil, err
  1026. }
  1027. if username == "root" {
  1028. // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
  1029. // effectively
  1030. logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
  1031. return &idtools.IdentityMapping{}, nil
  1032. }
  1033. logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
  1034. // update remapped root setting now that we have resolved them to actual names
  1035. config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
  1036. mappings, err := idtools.NewIdentityMapping(username, groupname)
  1037. if err != nil {
  1038. return nil, errors.Wrap(err, "Can't create ID mappings")
  1039. }
  1040. return mappings, nil
  1041. }
  1042. return &idtools.IdentityMapping{}, nil
  1043. }
  1044. func setupDaemonRoot(config *config.Config, rootDir string, rootIdentity idtools.Identity) error {
  1045. config.Root = rootDir
  1046. // the docker root metadata directory needs to have execute permissions for all users (g+x,o+x)
  1047. // so that syscalls executing as non-root, operating on subdirectories of the graph root
  1048. // (e.g. mounted layers of a container) can traverse this path.
  1049. // The user namespace support will create subdirectories for the remapped root host uid:gid
  1050. // pair owned by that same uid:gid pair for proper write access to those needed metadata and
  1051. // layer content subtrees.
  1052. if _, err := os.Stat(rootDir); err == nil {
  1053. // root current exists; verify the access bits are correct by setting them
  1054. if err = os.Chmod(rootDir, 0711); err != nil {
  1055. return err
  1056. }
  1057. } else if os.IsNotExist(err) {
  1058. // no root exists yet, create it 0711 with root:root ownership
  1059. if err := os.MkdirAll(rootDir, 0711); err != nil {
  1060. return err
  1061. }
  1062. }
  1063. // if user namespaces are enabled we will create a subtree underneath the specified root
  1064. // with any/all specified remapped root uid/gid options on the daemon creating
  1065. // a new subdirectory with ownership set to the remapped uid/gid (so as to allow
  1066. // `chdir()` to work for containers namespaced to that uid/gid)
  1067. if config.RemappedRoot != "" {
  1068. config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootIdentity.UID, rootIdentity.GID))
  1069. logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
  1070. // Create the root directory if it doesn't exist
  1071. if err := idtools.MkdirAllAndChown(config.Root, 0700, rootIdentity); err != nil {
  1072. return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
  1073. }
  1074. // we also need to verify that any pre-existing directories in the path to
  1075. // the graphroot won't block access to remapped root--if any pre-existing directory
  1076. // has strict permissions that don't allow "x", container start will fail, so
  1077. // better to warn and fail now
  1078. dirPath := config.Root
  1079. for {
  1080. dirPath = filepath.Dir(dirPath)
  1081. if dirPath == "/" {
  1082. break
  1083. }
  1084. if !idtools.CanAccess(dirPath, rootIdentity) {
  1085. return fmt.Errorf("a subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories", config.Root)
  1086. }
  1087. }
  1088. }
  1089. if err := setupDaemonRootPropagation(config); err != nil {
  1090. logrus.WithError(err).WithField("dir", config.Root).Warn("Error while setting daemon root propagation, this is not generally critical but may cause some functionality to not work or fallback to less desirable behavior")
  1091. }
  1092. return nil
  1093. }
  1094. func setupDaemonRootPropagation(cfg *config.Config) error {
  1095. rootParentMount, options, err := getSourceMount(cfg.Root)
  1096. if err != nil {
  1097. return errors.Wrap(err, "error getting daemon root's parent mount")
  1098. }
  1099. var cleanupOldFile bool
  1100. cleanupFile := getUnmountOnShutdownPath(cfg)
  1101. defer func() {
  1102. if !cleanupOldFile {
  1103. return
  1104. }
  1105. if err := os.Remove(cleanupFile); err != nil && !os.IsNotExist(err) {
  1106. logrus.WithError(err).WithField("file", cleanupFile).Warn("could not clean up old root propagation unmount file")
  1107. }
  1108. }()
  1109. if hasMountinfoOption(options, sharedPropagationOption, slavePropagationOption) {
  1110. cleanupOldFile = true
  1111. return nil
  1112. }
  1113. if err := mount.MakeShared(cfg.Root); err != nil {
  1114. return errors.Wrap(err, "could not setup daemon root propagation to shared")
  1115. }
  1116. // check the case where this may have already been a mount to itself.
  1117. // If so then the daemon only performed a remount and should not try to unmount this later.
  1118. if rootParentMount == cfg.Root {
  1119. cleanupOldFile = true
  1120. return nil
  1121. }
  1122. if err := ioutil.WriteFile(cleanupFile, nil, 0600); err != nil {
  1123. return errors.Wrap(err, "error writing file to signal mount cleanup on shutdown")
  1124. }
  1125. return nil
  1126. }
  1127. // getUnmountOnShutdownPath generates the path to used when writing the file that signals to the daemon that on shutdown
  1128. // the daemon root should be unmounted.
  1129. func getUnmountOnShutdownPath(config *config.Config) string {
  1130. return filepath.Join(config.ExecRoot, "unmount-on-shutdown")
  1131. }
  1132. // registerLinks writes the links to a file.
  1133. func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
  1134. if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() {
  1135. return nil
  1136. }
  1137. for _, l := range hostConfig.Links {
  1138. name, alias, err := opts.ParseLink(l)
  1139. if err != nil {
  1140. return err
  1141. }
  1142. child, err := daemon.GetContainer(name)
  1143. if err != nil {
  1144. return errors.Wrapf(err, "could not get container for %s", name)
  1145. }
  1146. for child.HostConfig.NetworkMode.IsContainer() {
  1147. parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2)
  1148. child, err = daemon.GetContainer(parts[1])
  1149. if err != nil {
  1150. return errors.Wrapf(err, "Could not get container for %s", parts[1])
  1151. }
  1152. }
  1153. if child.HostConfig.NetworkMode.IsHost() {
  1154. return runconfig.ErrConflictHostNetworkAndLinks
  1155. }
  1156. if err := daemon.registerLink(container, child, alias); err != nil {
  1157. return err
  1158. }
  1159. }
  1160. // After we load all the links into the daemon
  1161. // set them to nil on the hostconfig
  1162. _, err := container.WriteHostConfig()
  1163. return err
  1164. }
  1165. // conditionalMountOnStart is a platform specific helper function during the
  1166. // container start to call mount.
  1167. func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
  1168. return daemon.Mount(container)
  1169. }
  1170. // conditionalUnmountOnCleanup is a platform specific helper function called
  1171. // during the cleanup of a container to unmount.
  1172. func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
  1173. return daemon.Unmount(container)
  1174. }
  1175. func copyBlkioEntry(entries []*containerd_cgroups.BlkIOEntry) []types.BlkioStatEntry {
  1176. out := make([]types.BlkioStatEntry, len(entries))
  1177. for i, re := range entries {
  1178. out[i] = types.BlkioStatEntry{
  1179. Major: re.Major,
  1180. Minor: re.Minor,
  1181. Op: re.Op,
  1182. Value: re.Value,
  1183. }
  1184. }
  1185. return out
  1186. }
  1187. func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
  1188. if !c.IsRunning() {
  1189. return nil, errNotRunning(c.ID)
  1190. }
  1191. cs, err := daemon.containerd.Stats(context.Background(), c.ID)
  1192. if err != nil {
  1193. if strings.Contains(err.Error(), "container not found") {
  1194. return nil, containerNotFound(c.ID)
  1195. }
  1196. return nil, err
  1197. }
  1198. s := &types.StatsJSON{}
  1199. s.Read = cs.Read
  1200. stats := cs.Metrics
  1201. if stats.Blkio != nil {
  1202. s.BlkioStats = types.BlkioStats{
  1203. IoServiceBytesRecursive: copyBlkioEntry(stats.Blkio.IoServiceBytesRecursive),
  1204. IoServicedRecursive: copyBlkioEntry(stats.Blkio.IoServicedRecursive),
  1205. IoQueuedRecursive: copyBlkioEntry(stats.Blkio.IoQueuedRecursive),
  1206. IoServiceTimeRecursive: copyBlkioEntry(stats.Blkio.IoServiceTimeRecursive),
  1207. IoWaitTimeRecursive: copyBlkioEntry(stats.Blkio.IoWaitTimeRecursive),
  1208. IoMergedRecursive: copyBlkioEntry(stats.Blkio.IoMergedRecursive),
  1209. IoTimeRecursive: copyBlkioEntry(stats.Blkio.IoTimeRecursive),
  1210. SectorsRecursive: copyBlkioEntry(stats.Blkio.SectorsRecursive),
  1211. }
  1212. }
  1213. if stats.CPU != nil {
  1214. s.CPUStats = types.CPUStats{
  1215. CPUUsage: types.CPUUsage{
  1216. TotalUsage: stats.CPU.Usage.Total,
  1217. PercpuUsage: stats.CPU.Usage.PerCPU,
  1218. UsageInKernelmode: stats.CPU.Usage.Kernel,
  1219. UsageInUsermode: stats.CPU.Usage.User,
  1220. },
  1221. ThrottlingData: types.ThrottlingData{
  1222. Periods: stats.CPU.Throttling.Periods,
  1223. ThrottledPeriods: stats.CPU.Throttling.ThrottledPeriods,
  1224. ThrottledTime: stats.CPU.Throttling.ThrottledTime,
  1225. },
  1226. }
  1227. }
  1228. if stats.Memory != nil {
  1229. raw := make(map[string]uint64)
  1230. raw["cache"] = stats.Memory.Cache
  1231. raw["rss"] = stats.Memory.RSS
  1232. raw["rss_huge"] = stats.Memory.RSSHuge
  1233. raw["mapped_file"] = stats.Memory.MappedFile
  1234. raw["dirty"] = stats.Memory.Dirty
  1235. raw["writeback"] = stats.Memory.Writeback
  1236. raw["pgpgin"] = stats.Memory.PgPgIn
  1237. raw["pgpgout"] = stats.Memory.PgPgOut
  1238. raw["pgfault"] = stats.Memory.PgFault
  1239. raw["pgmajfault"] = stats.Memory.PgMajFault
  1240. raw["inactive_anon"] = stats.Memory.InactiveAnon
  1241. raw["active_anon"] = stats.Memory.ActiveAnon
  1242. raw["inactive_file"] = stats.Memory.InactiveFile
  1243. raw["active_file"] = stats.Memory.ActiveFile
  1244. raw["unevictable"] = stats.Memory.Unevictable
  1245. raw["hierarchical_memory_limit"] = stats.Memory.HierarchicalMemoryLimit
  1246. raw["hierarchical_memsw_limit"] = stats.Memory.HierarchicalSwapLimit
  1247. raw["total_cache"] = stats.Memory.TotalCache
  1248. raw["total_rss"] = stats.Memory.TotalRSS
  1249. raw["total_rss_huge"] = stats.Memory.TotalRSSHuge
  1250. raw["total_mapped_file"] = stats.Memory.TotalMappedFile
  1251. raw["total_dirty"] = stats.Memory.TotalDirty
  1252. raw["total_writeback"] = stats.Memory.TotalWriteback
  1253. raw["total_pgpgin"] = stats.Memory.TotalPgPgIn
  1254. raw["total_pgpgout"] = stats.Memory.TotalPgPgOut
  1255. raw["total_pgfault"] = stats.Memory.TotalPgFault
  1256. raw["total_pgmajfault"] = stats.Memory.TotalPgMajFault
  1257. raw["total_inactive_anon"] = stats.Memory.TotalInactiveAnon
  1258. raw["total_active_anon"] = stats.Memory.TotalActiveAnon
  1259. raw["total_inactive_file"] = stats.Memory.TotalInactiveFile
  1260. raw["total_active_file"] = stats.Memory.TotalActiveFile
  1261. raw["total_unevictable"] = stats.Memory.TotalUnevictable
  1262. if stats.Memory.Usage != nil {
  1263. s.MemoryStats = types.MemoryStats{
  1264. Stats: raw,
  1265. Usage: stats.Memory.Usage.Usage,
  1266. MaxUsage: stats.Memory.Usage.Max,
  1267. Limit: stats.Memory.Usage.Limit,
  1268. Failcnt: stats.Memory.Usage.Failcnt,
  1269. }
  1270. } else {
  1271. s.MemoryStats = types.MemoryStats{
  1272. Stats: raw,
  1273. }
  1274. }
  1275. // if the container does not set memory limit, use the machineMemory
  1276. if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
  1277. s.MemoryStats.Limit = daemon.machineMemory
  1278. }
  1279. }
  1280. if stats.Pids != nil {
  1281. s.PidsStats = types.PidsStats{
  1282. Current: stats.Pids.Current,
  1283. Limit: stats.Pids.Limit,
  1284. }
  1285. }
  1286. return s, nil
  1287. }
  1288. // setDefaultIsolation determines the default isolation mode for the
  1289. // daemon to run in. This is only applicable on Windows
  1290. func (daemon *Daemon) setDefaultIsolation() error {
  1291. return nil
  1292. }
  1293. // setupDaemonProcess sets various settings for the daemon's process
  1294. func setupDaemonProcess(config *config.Config) error {
  1295. // setup the daemons oom_score_adj
  1296. if err := setupOOMScoreAdj(config.OOMScoreAdjust); err != nil {
  1297. return err
  1298. }
  1299. if err := setMayDetachMounts(); err != nil {
  1300. logrus.WithError(err).Warn("Could not set may_detach_mounts kernel parameter")
  1301. }
  1302. return nil
  1303. }
  1304. // This is used to allow removal of mountpoints that may be mounted in other
  1305. // namespaces on RHEL based kernels starting from RHEL 7.4.
  1306. // Without this setting, removals on these RHEL based kernels may fail with
  1307. // "device or resource busy".
  1308. // This setting is not available in upstream kernels as it is not configurable,
  1309. // but has been in the upstream kernels since 3.15.
  1310. func setMayDetachMounts() error {
  1311. f, err := os.OpenFile("/proc/sys/fs/may_detach_mounts", os.O_WRONLY, 0)
  1312. if err != nil {
  1313. if os.IsNotExist(err) {
  1314. return nil
  1315. }
  1316. return errors.Wrap(err, "error opening may_detach_mounts kernel config file")
  1317. }
  1318. defer f.Close()
  1319. _, err = f.WriteString("1")
  1320. if os.IsPermission(err) {
  1321. // Setting may_detach_mounts does not work in an
  1322. // unprivileged container. Ignore the error, but log
  1323. // it if we appear not to be in that situation.
  1324. if !rsystem.RunningInUserNS() {
  1325. logrus.Debugf("Permission denied writing %q to /proc/sys/fs/may_detach_mounts", "1")
  1326. }
  1327. return nil
  1328. }
  1329. return err
  1330. }
  1331. func setupOOMScoreAdj(score int) error {
  1332. f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0)
  1333. if err != nil {
  1334. return err
  1335. }
  1336. defer f.Close()
  1337. stringScore := strconv.Itoa(score)
  1338. _, err = f.WriteString(stringScore)
  1339. if os.IsPermission(err) {
  1340. // Setting oom_score_adj does not work in an
  1341. // unprivileged container. Ignore the error, but log
  1342. // it if we appear not to be in that situation.
  1343. if !rsystem.RunningInUserNS() {
  1344. logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore)
  1345. }
  1346. return nil
  1347. }
  1348. return err
  1349. }
  1350. func (daemon *Daemon) initCgroupsPath(path string) error {
  1351. if path == "/" || path == "." {
  1352. return nil
  1353. }
  1354. if daemon.configStore.CPURealtimePeriod == 0 && daemon.configStore.CPURealtimeRuntime == 0 {
  1355. return nil
  1356. }
  1357. // Recursively create cgroup to ensure that the system and all parent cgroups have values set
  1358. // for the period and runtime as this limits what the children can be set to.
  1359. daemon.initCgroupsPath(filepath.Dir(path))
  1360. mnt, root, err := cgroups.FindCgroupMountpointAndRoot("cpu")
  1361. if err != nil {
  1362. return err
  1363. }
  1364. // When docker is run inside docker, the root is based of the host cgroup.
  1365. // Should this be handled in runc/libcontainer/cgroups ?
  1366. if strings.HasPrefix(root, "/docker/") {
  1367. root = "/"
  1368. }
  1369. path = filepath.Join(mnt, root, path)
  1370. sysinfo := sysinfo.New(true)
  1371. if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil {
  1372. return err
  1373. }
  1374. return maybeCreateCPURealTimeFile(sysinfo.CPURealtimeRuntime, daemon.configStore.CPURealtimeRuntime, "cpu.rt_runtime_us", path)
  1375. }
  1376. func maybeCreateCPURealTimeFile(sysinfoPresent bool, configValue int64, file string, path string) error {
  1377. if sysinfoPresent && configValue != 0 {
  1378. if err := os.MkdirAll(path, 0755); err != nil {
  1379. return err
  1380. }
  1381. if err := ioutil.WriteFile(filepath.Join(path, file), []byte(strconv.FormatInt(configValue, 10)), 0700); err != nil {
  1382. return err
  1383. }
  1384. }
  1385. return nil
  1386. }
  1387. func (daemon *Daemon) setupSeccompProfile() error {
  1388. if daemon.configStore.SeccompProfile != "" {
  1389. daemon.seccompProfilePath = daemon.configStore.SeccompProfile
  1390. b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile)
  1391. if err != nil {
  1392. return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err)
  1393. }
  1394. daemon.seccompProfile = b
  1395. }
  1396. return nil
  1397. }