container.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. package container
  2. import (
  3. "errors"
  4. "fmt"
  5. "net"
  6. "strings"
  7. "time"
  8. "github.com/Sirupsen/logrus"
  9. "github.com/docker/docker/api/types"
  10. enginecontainer "github.com/docker/docker/api/types/container"
  11. "github.com/docker/docker/api/types/events"
  12. "github.com/docker/docker/api/types/filters"
  13. "github.com/docker/docker/api/types/network"
  14. volumetypes "github.com/docker/docker/api/types/volume"
  15. clustertypes "github.com/docker/docker/daemon/cluster/provider"
  16. "github.com/docker/docker/reference"
  17. "github.com/docker/swarmkit/agent/exec"
  18. "github.com/docker/swarmkit/api"
  19. "github.com/docker/swarmkit/protobuf/ptypes"
  20. "github.com/docker/swarmkit/template"
  21. )
  22. const (
  23. // Explicitly use the kernel's default setting for CPU quota of 100ms.
  24. // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
  25. cpuQuotaPeriod = 100 * time.Millisecond
  26. // systemLabelPrefix represents the reserved namespace for system labels.
  27. systemLabelPrefix = "com.docker.swarm"
  28. )
  29. // containerConfig converts task properties into docker container compatible
  30. // components.
  31. type containerConfig struct {
  32. task *api.Task
  33. networksAttachments map[string]*api.NetworkAttachment
  34. }
  35. // newContainerConfig returns a validated container config. No methods should
  36. // return an error if this function returns without error.
  37. func newContainerConfig(t *api.Task) (*containerConfig, error) {
  38. var c containerConfig
  39. return &c, c.setTask(t)
  40. }
  41. func (c *containerConfig) setTask(t *api.Task) error {
  42. if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil {
  43. return exec.ErrRuntimeUnsupported
  44. }
  45. container := t.Spec.GetContainer()
  46. if container != nil {
  47. if container.Image == "" {
  48. return ErrImageRequired
  49. }
  50. if err := validateMounts(container.Mounts); err != nil {
  51. return err
  52. }
  53. }
  54. // index the networks by name
  55. c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks))
  56. for _, attachment := range t.Networks {
  57. c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment
  58. }
  59. c.task = t
  60. if t.Spec.GetContainer() != nil {
  61. preparedSpec, err := template.ExpandContainerSpec(t)
  62. if err != nil {
  63. return err
  64. }
  65. c.task.Spec.Runtime = &api.TaskSpec_Container{
  66. Container: preparedSpec,
  67. }
  68. }
  69. return nil
  70. }
  71. func (c *containerConfig) id() string {
  72. attachment := c.task.Spec.GetAttachment()
  73. if attachment == nil {
  74. return ""
  75. }
  76. return attachment.ContainerID
  77. }
  78. func (c *containerConfig) taskID() string {
  79. return c.task.ID
  80. }
  81. func (c *containerConfig) endpoint() *api.Endpoint {
  82. return c.task.Endpoint
  83. }
  84. func (c *containerConfig) spec() *api.ContainerSpec {
  85. return c.task.Spec.GetContainer()
  86. }
  87. func (c *containerConfig) nameOrID() string {
  88. if c.task.Spec.GetContainer() != nil {
  89. return c.name()
  90. }
  91. return c.id()
  92. }
  93. func (c *containerConfig) name() string {
  94. if c.task.Annotations.Name != "" {
  95. // if set, use the container Annotations.Name field, set in the orchestrator.
  96. return c.task.Annotations.Name
  97. }
  98. slot := fmt.Sprint(c.task.Slot)
  99. if slot == "" || c.task.Slot == 0 {
  100. slot = c.task.NodeID
  101. }
  102. // fallback to service.slot.id.
  103. return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID)
  104. }
  105. func (c *containerConfig) image() string {
  106. raw := c.spec().Image
  107. ref, err := reference.ParseNamed(raw)
  108. if err != nil {
  109. return raw
  110. }
  111. return reference.WithDefaultTag(ref).String()
  112. }
  113. func (c *containerConfig) config() *enginecontainer.Config {
  114. config := &enginecontainer.Config{
  115. Labels: c.labels(),
  116. Tty: c.spec().TTY,
  117. User: c.spec().User,
  118. Hostname: c.spec().Hostname,
  119. Env: c.spec().Env,
  120. WorkingDir: c.spec().Dir,
  121. Image: c.image(),
  122. Volumes: c.volumes(),
  123. Healthcheck: c.healthcheck(),
  124. }
  125. if len(c.spec().Command) > 0 {
  126. // If Command is provided, we replace the whole invocation with Command
  127. // by replacing Entrypoint and specifying Cmd. Args is ignored in this
  128. // case.
  129. config.Entrypoint = append(config.Entrypoint, c.spec().Command...)
  130. config.Cmd = append(config.Cmd, c.spec().Args...)
  131. } else if len(c.spec().Args) > 0 {
  132. // In this case, we assume the image has an Entrypoint and Args
  133. // specifies the arguments for that entrypoint.
  134. config.Cmd = c.spec().Args
  135. }
  136. return config
  137. }
  138. func (c *containerConfig) labels() map[string]string {
  139. var (
  140. system = map[string]string{
  141. "task": "", // mark as cluster task
  142. "task.id": c.task.ID,
  143. "task.name": c.name(),
  144. "node.id": c.task.NodeID,
  145. "service.id": c.task.ServiceID,
  146. "service.name": c.task.ServiceAnnotations.Name,
  147. }
  148. labels = make(map[string]string)
  149. )
  150. // base labels are those defined in the spec.
  151. for k, v := range c.spec().Labels {
  152. labels[k] = v
  153. }
  154. // we then apply the overrides from the task, which may be set via the
  155. // orchestrator.
  156. for k, v := range c.task.Annotations.Labels {
  157. labels[k] = v
  158. }
  159. // finally, we apply the system labels, which override all labels.
  160. for k, v := range system {
  161. labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v
  162. }
  163. return labels
  164. }
  165. // volumes gets placed into the Volumes field on the containerConfig.
  166. func (c *containerConfig) volumes() map[string]struct{} {
  167. r := make(map[string]struct{})
  168. // Volumes *only* creates anonymous volumes. The rest is mixed in with
  169. // binds, which aren't actually binds. Basically, any volume that
  170. // results in a single component must be added here.
  171. //
  172. // This is reversed engineered from the behavior of the engine API.
  173. for _, mount := range c.spec().Mounts {
  174. if mount.Type == api.MountTypeVolume && mount.Source == "" {
  175. r[mount.Target] = struct{}{}
  176. }
  177. }
  178. return r
  179. }
  180. func (c *containerConfig) tmpfs() map[string]string {
  181. r := make(map[string]string)
  182. for _, spec := range c.spec().Mounts {
  183. if spec.Type != api.MountTypeTmpfs {
  184. continue
  185. }
  186. r[spec.Target] = getMountMask(&spec)
  187. }
  188. return r
  189. }
  190. func (c *containerConfig) binds() []string {
  191. var r []string
  192. for _, mount := range c.spec().Mounts {
  193. if mount.Type == api.MountTypeBind || (mount.Type == api.MountTypeVolume && mount.Source != "") {
  194. spec := fmt.Sprintf("%s:%s", mount.Source, mount.Target)
  195. mask := getMountMask(&mount)
  196. if mask != "" {
  197. spec = fmt.Sprintf("%s:%s", spec, mask)
  198. }
  199. r = append(r, spec)
  200. }
  201. }
  202. return r
  203. }
  204. func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig {
  205. hcSpec := c.spec().Healthcheck
  206. if hcSpec == nil {
  207. return nil
  208. }
  209. interval, _ := ptypes.Duration(hcSpec.Interval)
  210. timeout, _ := ptypes.Duration(hcSpec.Timeout)
  211. return &enginecontainer.HealthConfig{
  212. Test: hcSpec.Test,
  213. Interval: interval,
  214. Timeout: timeout,
  215. Retries: int(hcSpec.Retries),
  216. }
  217. }
  218. func getMountMask(m *api.Mount) string {
  219. var maskOpts []string
  220. if m.ReadOnly {
  221. maskOpts = append(maskOpts, "ro")
  222. }
  223. switch m.Type {
  224. case api.MountTypeVolume:
  225. if m.VolumeOptions != nil && m.VolumeOptions.NoCopy {
  226. maskOpts = append(maskOpts, "nocopy")
  227. }
  228. case api.MountTypeBind:
  229. if m.BindOptions == nil {
  230. break
  231. }
  232. switch m.BindOptions.Propagation {
  233. case api.MountPropagationPrivate:
  234. maskOpts = append(maskOpts, "private")
  235. case api.MountPropagationRPrivate:
  236. maskOpts = append(maskOpts, "rprivate")
  237. case api.MountPropagationShared:
  238. maskOpts = append(maskOpts, "shared")
  239. case api.MountPropagationRShared:
  240. maskOpts = append(maskOpts, "rshared")
  241. case api.MountPropagationSlave:
  242. maskOpts = append(maskOpts, "slave")
  243. case api.MountPropagationRSlave:
  244. maskOpts = append(maskOpts, "rslave")
  245. }
  246. case api.MountTypeTmpfs:
  247. if m.TmpfsOptions == nil {
  248. break
  249. }
  250. if m.TmpfsOptions.Mode != 0 {
  251. maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode))
  252. }
  253. if m.TmpfsOptions.SizeBytes != 0 {
  254. // calculate suffix here, making this linux specific, but that is
  255. // okay, since API is that way anyways.
  256. // we do this by finding the suffix that divides evenly into the
  257. // value, returing the value itself, with no suffix, if it fails.
  258. //
  259. // For the most part, we don't enforce any semantic to this values.
  260. // The operating system will usually align this and enforce minimum
  261. // and maximums.
  262. var (
  263. size = m.TmpfsOptions.SizeBytes
  264. suffix string
  265. )
  266. for _, r := range []struct {
  267. suffix string
  268. divisor int64
  269. }{
  270. {"g", 1 << 30},
  271. {"m", 1 << 20},
  272. {"k", 1 << 10},
  273. } {
  274. if size%r.divisor == 0 {
  275. size = size / r.divisor
  276. suffix = r.suffix
  277. break
  278. }
  279. }
  280. maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix))
  281. }
  282. }
  283. return strings.Join(maskOpts, ",")
  284. }
  285. func (c *containerConfig) hostConfig() *enginecontainer.HostConfig {
  286. hc := &enginecontainer.HostConfig{
  287. Resources: c.resources(),
  288. Binds: c.binds(),
  289. Tmpfs: c.tmpfs(),
  290. GroupAdd: c.spec().Groups,
  291. }
  292. if c.spec().DNSConfig != nil {
  293. hc.DNS = c.spec().DNSConfig.Nameservers
  294. hc.DNSSearch = c.spec().DNSConfig.Search
  295. hc.DNSOptions = c.spec().DNSConfig.Options
  296. }
  297. // The format of extra hosts on swarmkit is specified in:
  298. // http://man7.org/linux/man-pages/man5/hosts.5.html
  299. // IP_address canonical_hostname [aliases...]
  300. // However, the format of ExtraHosts in HostConfig is
  301. // <host>:<ip>
  302. // We need to do the conversion here
  303. // (Alias is ignored for now)
  304. for _, entry := range c.spec().Hosts {
  305. parts := strings.Fields(entry)
  306. if len(parts) > 1 {
  307. hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0]))
  308. }
  309. }
  310. if c.task.LogDriver != nil {
  311. hc.LogConfig = enginecontainer.LogConfig{
  312. Type: c.task.LogDriver.Name,
  313. Config: c.task.LogDriver.Options,
  314. }
  315. }
  316. return hc
  317. }
  318. // This handles the case of volumes that are defined inside a service Mount
  319. func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumesCreateBody {
  320. var (
  321. driverName string
  322. driverOpts map[string]string
  323. labels map[string]string
  324. )
  325. if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil {
  326. driverName = mount.VolumeOptions.DriverConfig.Name
  327. driverOpts = mount.VolumeOptions.DriverConfig.Options
  328. labels = mount.VolumeOptions.Labels
  329. }
  330. if mount.VolumeOptions != nil {
  331. return &volumetypes.VolumesCreateBody{
  332. Name: mount.Source,
  333. Driver: driverName,
  334. DriverOpts: driverOpts,
  335. Labels: labels,
  336. }
  337. }
  338. return nil
  339. }
  340. func (c *containerConfig) resources() enginecontainer.Resources {
  341. resources := enginecontainer.Resources{}
  342. // If no limits are specified let the engine use its defaults.
  343. //
  344. // TODO(aluzzardi): We might want to set some limits anyway otherwise
  345. // "unlimited" tasks will step over the reservation of other tasks.
  346. r := c.task.Spec.Resources
  347. if r == nil || r.Limits == nil {
  348. return resources
  349. }
  350. if r.Limits.MemoryBytes > 0 {
  351. resources.Memory = r.Limits.MemoryBytes
  352. }
  353. if r.Limits.NanoCPUs > 0 {
  354. // CPU Period must be set in microseconds.
  355. resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond)
  356. resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9
  357. }
  358. return resources
  359. }
  360. // Docker daemon supports just 1 network during container create.
  361. func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig {
  362. var networks []*api.NetworkAttachment
  363. if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil {
  364. networks = c.task.Networks
  365. }
  366. epConfig := make(map[string]*network.EndpointSettings)
  367. if len(networks) > 0 {
  368. epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0])
  369. }
  370. return &network.NetworkingConfig{EndpointsConfig: epConfig}
  371. }
  372. // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create
  373. func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig {
  374. var networks []*api.NetworkAttachment
  375. if c.task.Spec.GetContainer() != nil {
  376. networks = c.task.Networks
  377. }
  378. // First network is used during container create. Other networks are used in "docker network connect"
  379. if len(networks) < 2 {
  380. return nil
  381. }
  382. epConfig := make(map[string]*network.EndpointSettings)
  383. for _, na := range networks[1:] {
  384. epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na)
  385. }
  386. return &network.NetworkingConfig{EndpointsConfig: epConfig}
  387. }
  388. func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings {
  389. var ipv4, ipv6 string
  390. for _, addr := range na.Addresses {
  391. ip, _, err := net.ParseCIDR(addr)
  392. if err != nil {
  393. continue
  394. }
  395. if ip.To4() != nil {
  396. ipv4 = ip.String()
  397. continue
  398. }
  399. if ip.To16() != nil {
  400. ipv6 = ip.String()
  401. }
  402. }
  403. return &network.EndpointSettings{
  404. NetworkID: na.Network.ID,
  405. IPAMConfig: &network.EndpointIPAMConfig{
  406. IPv4Address: ipv4,
  407. IPv6Address: ipv6,
  408. },
  409. }
  410. }
  411. func (c *containerConfig) virtualIP(networkID string) string {
  412. if c.task.Endpoint == nil {
  413. return ""
  414. }
  415. for _, eVip := range c.task.Endpoint.VirtualIPs {
  416. // We only support IPv4 VIPs for now.
  417. if eVip.NetworkID == networkID {
  418. vip, _, err := net.ParseCIDR(eVip.Addr)
  419. if err != nil {
  420. return ""
  421. }
  422. return vip.String()
  423. }
  424. }
  425. return ""
  426. }
  427. func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig {
  428. if len(c.task.Networks) == 0 {
  429. return nil
  430. }
  431. logrus.Debugf("Creating service config in agent for t = %+v", c.task)
  432. svcCfg := &clustertypes.ServiceConfig{
  433. Name: c.task.ServiceAnnotations.Name,
  434. Aliases: make(map[string][]string),
  435. ID: c.task.ServiceID,
  436. VirtualAddresses: make(map[string]*clustertypes.VirtualAddress),
  437. }
  438. for _, na := range c.task.Networks {
  439. svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{
  440. // We support only IPv4 virtual IP for now.
  441. IPv4: c.virtualIP(na.Network.ID),
  442. }
  443. if len(na.Aliases) > 0 {
  444. svcCfg.Aliases[na.Network.ID] = na.Aliases
  445. }
  446. }
  447. if c.task.Endpoint != nil {
  448. for _, ePort := range c.task.Endpoint.Ports {
  449. svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{
  450. Name: ePort.Name,
  451. Protocol: int32(ePort.Protocol),
  452. TargetPort: ePort.TargetPort,
  453. PublishedPort: ePort.PublishedPort,
  454. })
  455. }
  456. }
  457. return svcCfg
  458. }
  459. // networks returns a list of network names attached to the container. The
  460. // returned name can be used to lookup the corresponding network create
  461. // options.
  462. func (c *containerConfig) networks() []string {
  463. var networks []string
  464. for name := range c.networksAttachments {
  465. networks = append(networks, name)
  466. }
  467. return networks
  468. }
  469. func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) {
  470. na, ok := c.networksAttachments[name]
  471. if !ok {
  472. return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced")
  473. }
  474. options := types.NetworkCreate{
  475. // ID: na.Network.ID,
  476. Driver: na.Network.DriverState.Name,
  477. IPAM: &network.IPAM{
  478. Driver: na.Network.IPAM.Driver.Name,
  479. },
  480. Options: na.Network.DriverState.Options,
  481. Labels: na.Network.Spec.Annotations.Labels,
  482. Internal: na.Network.Spec.Internal,
  483. EnableIPv6: na.Network.Spec.Ipv6Enabled,
  484. CheckDuplicate: true,
  485. }
  486. for _, ic := range na.Network.IPAM.Configs {
  487. c := network.IPAMConfig{
  488. Subnet: ic.Subnet,
  489. IPRange: ic.Range,
  490. Gateway: ic.Gateway,
  491. }
  492. options.IPAM.Config = append(options.IPAM.Config, c)
  493. }
  494. return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil
  495. }
  496. func (c containerConfig) eventFilter() filters.Args {
  497. filter := filters.NewArgs()
  498. filter.Add("type", events.ContainerEventType)
  499. filter.Add("name", c.name())
  500. filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID))
  501. return filter
  502. }