container.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. package container
  2. import (
  3. "errors"
  4. "fmt"
  5. "net"
  6. "strconv"
  7. "strings"
  8. "time"
  9. "github.com/Sirupsen/logrus"
  10. "github.com/docker/distribution/reference"
  11. "github.com/docker/docker/api/types"
  12. enginecontainer "github.com/docker/docker/api/types/container"
  13. "github.com/docker/docker/api/types/events"
  14. "github.com/docker/docker/api/types/filters"
  15. enginemount "github.com/docker/docker/api/types/mount"
  16. "github.com/docker/docker/api/types/network"
  17. volumetypes "github.com/docker/docker/api/types/volume"
  18. clustertypes "github.com/docker/docker/daemon/cluster/provider"
  19. "github.com/docker/go-connections/nat"
  20. "github.com/docker/swarmkit/agent/exec"
  21. "github.com/docker/swarmkit/api"
  22. "github.com/docker/swarmkit/template"
  23. gogotypes "github.com/gogo/protobuf/types"
  24. )
  25. const (
  26. // Explicitly use the kernel's default setting for CPU quota of 100ms.
  27. // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
  28. cpuQuotaPeriod = 100 * time.Millisecond
  29. // systemLabelPrefix represents the reserved namespace for system labels.
  30. systemLabelPrefix = "com.docker.swarm"
  31. )
  32. // containerConfig converts task properties into docker container compatible
  33. // components.
  34. type containerConfig struct {
  35. task *api.Task
  36. networksAttachments map[string]*api.NetworkAttachment
  37. }
  38. // newContainerConfig returns a validated container config. No methods should
  39. // return an error if this function returns without error.
  40. func newContainerConfig(t *api.Task) (*containerConfig, error) {
  41. var c containerConfig
  42. return &c, c.setTask(t)
  43. }
  44. func (c *containerConfig) setTask(t *api.Task) error {
  45. if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil {
  46. return exec.ErrRuntimeUnsupported
  47. }
  48. container := t.Spec.GetContainer()
  49. if container != nil {
  50. if container.Image == "" {
  51. return ErrImageRequired
  52. }
  53. if err := validateMounts(container.Mounts); err != nil {
  54. return err
  55. }
  56. }
  57. // index the networks by name
  58. c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks))
  59. for _, attachment := range t.Networks {
  60. c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment
  61. }
  62. c.task = t
  63. if t.Spec.GetContainer() != nil {
  64. preparedSpec, err := template.ExpandContainerSpec(t)
  65. if err != nil {
  66. return err
  67. }
  68. c.task.Spec.Runtime = &api.TaskSpec_Container{
  69. Container: preparedSpec,
  70. }
  71. }
  72. return nil
  73. }
  74. func (c *containerConfig) id() string {
  75. attachment := c.task.Spec.GetAttachment()
  76. if attachment == nil {
  77. return ""
  78. }
  79. return attachment.ContainerID
  80. }
  81. func (c *containerConfig) taskID() string {
  82. return c.task.ID
  83. }
  84. func (c *containerConfig) endpoint() *api.Endpoint {
  85. return c.task.Endpoint
  86. }
  87. func (c *containerConfig) spec() *api.ContainerSpec {
  88. return c.task.Spec.GetContainer()
  89. }
  90. func (c *containerConfig) nameOrID() string {
  91. if c.task.Spec.GetContainer() != nil {
  92. return c.name()
  93. }
  94. return c.id()
  95. }
  96. func (c *containerConfig) name() string {
  97. if c.task.Annotations.Name != "" {
  98. // if set, use the container Annotations.Name field, set in the orchestrator.
  99. return c.task.Annotations.Name
  100. }
  101. slot := fmt.Sprint(c.task.Slot)
  102. if slot == "" || c.task.Slot == 0 {
  103. slot = c.task.NodeID
  104. }
  105. // fallback to service.slot.id.
  106. return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID)
  107. }
  108. func (c *containerConfig) image() string {
  109. raw := c.spec().Image
  110. ref, err := reference.ParseNormalizedNamed(raw)
  111. if err != nil {
  112. return raw
  113. }
  114. return reference.FamiliarString(reference.TagNameOnly(ref))
  115. }
  116. func (c *containerConfig) portBindings() nat.PortMap {
  117. portBindings := nat.PortMap{}
  118. if c.task.Endpoint == nil {
  119. return portBindings
  120. }
  121. for _, portConfig := range c.task.Endpoint.Ports {
  122. if portConfig.PublishMode != api.PublishModeHost {
  123. continue
  124. }
  125. port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String())))
  126. binding := []nat.PortBinding{
  127. {},
  128. }
  129. if portConfig.PublishedPort != 0 {
  130. binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort))
  131. }
  132. portBindings[port] = binding
  133. }
  134. return portBindings
  135. }
  136. func (c *containerConfig) exposedPorts() map[nat.Port]struct{} {
  137. exposedPorts := make(map[nat.Port]struct{})
  138. if c.task.Endpoint == nil {
  139. return exposedPorts
  140. }
  141. for _, portConfig := range c.task.Endpoint.Ports {
  142. if portConfig.PublishMode != api.PublishModeHost {
  143. continue
  144. }
  145. port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String())))
  146. exposedPorts[port] = struct{}{}
  147. }
  148. return exposedPorts
  149. }
  150. func (c *containerConfig) config() *enginecontainer.Config {
  151. config := &enginecontainer.Config{
  152. Labels: c.labels(),
  153. StopSignal: c.spec().StopSignal,
  154. Tty: c.spec().TTY,
  155. OpenStdin: c.spec().OpenStdin,
  156. User: c.spec().User,
  157. Env: c.spec().Env,
  158. Hostname: c.spec().Hostname,
  159. WorkingDir: c.spec().Dir,
  160. Image: c.image(),
  161. ExposedPorts: c.exposedPorts(),
  162. Healthcheck: c.healthcheck(),
  163. }
  164. if len(c.spec().Command) > 0 {
  165. // If Command is provided, we replace the whole invocation with Command
  166. // by replacing Entrypoint and specifying Cmd. Args is ignored in this
  167. // case.
  168. config.Entrypoint = append(config.Entrypoint, c.spec().Command...)
  169. config.Cmd = append(config.Cmd, c.spec().Args...)
  170. } else if len(c.spec().Args) > 0 {
  171. // In this case, we assume the image has an Entrypoint and Args
  172. // specifies the arguments for that entrypoint.
  173. config.Cmd = c.spec().Args
  174. }
  175. return config
  176. }
  177. func (c *containerConfig) labels() map[string]string {
  178. var (
  179. system = map[string]string{
  180. "task": "", // mark as cluster task
  181. "task.id": c.task.ID,
  182. "task.name": c.name(),
  183. "node.id": c.task.NodeID,
  184. "service.id": c.task.ServiceID,
  185. "service.name": c.task.ServiceAnnotations.Name,
  186. }
  187. labels = make(map[string]string)
  188. )
  189. // base labels are those defined in the spec.
  190. for k, v := range c.spec().Labels {
  191. labels[k] = v
  192. }
  193. // we then apply the overrides from the task, which may be set via the
  194. // orchestrator.
  195. for k, v := range c.task.Annotations.Labels {
  196. labels[k] = v
  197. }
  198. // finally, we apply the system labels, which override all labels.
  199. for k, v := range system {
  200. labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v
  201. }
  202. return labels
  203. }
  204. func (c *containerConfig) mounts() []enginemount.Mount {
  205. var r []enginemount.Mount
  206. for _, mount := range c.spec().Mounts {
  207. r = append(r, convertMount(mount))
  208. }
  209. return r
  210. }
  211. func convertMount(m api.Mount) enginemount.Mount {
  212. mount := enginemount.Mount{
  213. Source: m.Source,
  214. Target: m.Target,
  215. ReadOnly: m.ReadOnly,
  216. }
  217. switch m.Type {
  218. case api.MountTypeBind:
  219. mount.Type = enginemount.TypeBind
  220. case api.MountTypeVolume:
  221. mount.Type = enginemount.TypeVolume
  222. case api.MountTypeTmpfs:
  223. mount.Type = enginemount.TypeTmpfs
  224. }
  225. if m.BindOptions != nil {
  226. mount.BindOptions = &enginemount.BindOptions{}
  227. switch m.BindOptions.Propagation {
  228. case api.MountPropagationRPrivate:
  229. mount.BindOptions.Propagation = enginemount.PropagationRPrivate
  230. case api.MountPropagationPrivate:
  231. mount.BindOptions.Propagation = enginemount.PropagationPrivate
  232. case api.MountPropagationRSlave:
  233. mount.BindOptions.Propagation = enginemount.PropagationRSlave
  234. case api.MountPropagationSlave:
  235. mount.BindOptions.Propagation = enginemount.PropagationSlave
  236. case api.MountPropagationRShared:
  237. mount.BindOptions.Propagation = enginemount.PropagationRShared
  238. case api.MountPropagationShared:
  239. mount.BindOptions.Propagation = enginemount.PropagationShared
  240. }
  241. }
  242. if m.VolumeOptions != nil {
  243. mount.VolumeOptions = &enginemount.VolumeOptions{
  244. NoCopy: m.VolumeOptions.NoCopy,
  245. }
  246. if m.VolumeOptions.Labels != nil {
  247. mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels))
  248. for k, v := range m.VolumeOptions.Labels {
  249. mount.VolumeOptions.Labels[k] = v
  250. }
  251. }
  252. if m.VolumeOptions.DriverConfig != nil {
  253. mount.VolumeOptions.DriverConfig = &enginemount.Driver{
  254. Name: m.VolumeOptions.DriverConfig.Name,
  255. }
  256. if m.VolumeOptions.DriverConfig.Options != nil {
  257. mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options))
  258. for k, v := range m.VolumeOptions.DriverConfig.Options {
  259. mount.VolumeOptions.DriverConfig.Options[k] = v
  260. }
  261. }
  262. }
  263. }
  264. if m.TmpfsOptions != nil {
  265. mount.TmpfsOptions = &enginemount.TmpfsOptions{
  266. SizeBytes: m.TmpfsOptions.SizeBytes,
  267. Mode: m.TmpfsOptions.Mode,
  268. }
  269. }
  270. return mount
  271. }
  272. func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig {
  273. hcSpec := c.spec().Healthcheck
  274. if hcSpec == nil {
  275. return nil
  276. }
  277. interval, _ := gogotypes.DurationFromProto(hcSpec.Interval)
  278. timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout)
  279. startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod)
  280. return &enginecontainer.HealthConfig{
  281. Test: hcSpec.Test,
  282. Interval: interval,
  283. Timeout: timeout,
  284. Retries: int(hcSpec.Retries),
  285. StartPeriod: startPeriod,
  286. }
  287. }
  288. func (c *containerConfig) hostConfig() *enginecontainer.HostConfig {
  289. hc := &enginecontainer.HostConfig{
  290. Resources: c.resources(),
  291. GroupAdd: c.spec().Groups,
  292. PortBindings: c.portBindings(),
  293. Mounts: c.mounts(),
  294. ReadonlyRootfs: c.spec().ReadOnly,
  295. }
  296. if c.spec().DNSConfig != nil {
  297. hc.DNS = c.spec().DNSConfig.Nameservers
  298. hc.DNSSearch = c.spec().DNSConfig.Search
  299. hc.DNSOptions = c.spec().DNSConfig.Options
  300. }
  301. // The format of extra hosts on swarmkit is specified in:
  302. // http://man7.org/linux/man-pages/man5/hosts.5.html
  303. // IP_address canonical_hostname [aliases...]
  304. // However, the format of ExtraHosts in HostConfig is
  305. // <host>:<ip>
  306. // We need to do the conversion here
  307. // (Alias is ignored for now)
  308. for _, entry := range c.spec().Hosts {
  309. parts := strings.Fields(entry)
  310. if len(parts) > 1 {
  311. hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0]))
  312. }
  313. }
  314. if c.task.LogDriver != nil {
  315. hc.LogConfig = enginecontainer.LogConfig{
  316. Type: c.task.LogDriver.Name,
  317. Config: c.task.LogDriver.Options,
  318. }
  319. }
  320. return hc
  321. }
  322. // This handles the case of volumes that are defined inside a service Mount
  323. func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumesCreateBody {
  324. var (
  325. driverName string
  326. driverOpts map[string]string
  327. labels map[string]string
  328. )
  329. if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil {
  330. driverName = mount.VolumeOptions.DriverConfig.Name
  331. driverOpts = mount.VolumeOptions.DriverConfig.Options
  332. labels = mount.VolumeOptions.Labels
  333. }
  334. if mount.VolumeOptions != nil {
  335. return &volumetypes.VolumesCreateBody{
  336. Name: mount.Source,
  337. Driver: driverName,
  338. DriverOpts: driverOpts,
  339. Labels: labels,
  340. }
  341. }
  342. return nil
  343. }
  344. func (c *containerConfig) resources() enginecontainer.Resources {
  345. resources := enginecontainer.Resources{}
  346. // If no limits are specified let the engine use its defaults.
  347. //
  348. // TODO(aluzzardi): We might want to set some limits anyway otherwise
  349. // "unlimited" tasks will step over the reservation of other tasks.
  350. r := c.task.Spec.Resources
  351. if r == nil || r.Limits == nil {
  352. return resources
  353. }
  354. if r.Limits.MemoryBytes > 0 {
  355. resources.Memory = r.Limits.MemoryBytes
  356. }
  357. if r.Limits.NanoCPUs > 0 {
  358. // CPU Period must be set in microseconds.
  359. resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond)
  360. resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9
  361. }
  362. return resources
  363. }
  364. // Docker daemon supports just 1 network during container create.
  365. func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig {
  366. var networks []*api.NetworkAttachment
  367. if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil {
  368. networks = c.task.Networks
  369. }
  370. epConfig := make(map[string]*network.EndpointSettings)
  371. if len(networks) > 0 {
  372. epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0])
  373. }
  374. return &network.NetworkingConfig{EndpointsConfig: epConfig}
  375. }
  376. // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create
  377. func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig {
  378. var networks []*api.NetworkAttachment
  379. if c.task.Spec.GetContainer() != nil {
  380. networks = c.task.Networks
  381. }
  382. // First network is used during container create. Other networks are used in "docker network connect"
  383. if len(networks) < 2 {
  384. return nil
  385. }
  386. epConfig := make(map[string]*network.EndpointSettings)
  387. for _, na := range networks[1:] {
  388. epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na)
  389. }
  390. return &network.NetworkingConfig{EndpointsConfig: epConfig}
  391. }
  392. func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings {
  393. var ipv4, ipv6 string
  394. for _, addr := range na.Addresses {
  395. ip, _, err := net.ParseCIDR(addr)
  396. if err != nil {
  397. continue
  398. }
  399. if ip.To4() != nil {
  400. ipv4 = ip.String()
  401. continue
  402. }
  403. if ip.To16() != nil {
  404. ipv6 = ip.String()
  405. }
  406. }
  407. return &network.EndpointSettings{
  408. NetworkID: na.Network.ID,
  409. IPAMConfig: &network.EndpointIPAMConfig{
  410. IPv4Address: ipv4,
  411. IPv6Address: ipv6,
  412. },
  413. }
  414. }
  415. func (c *containerConfig) virtualIP(networkID string) string {
  416. if c.task.Endpoint == nil {
  417. return ""
  418. }
  419. for _, eVip := range c.task.Endpoint.VirtualIPs {
  420. // We only support IPv4 VIPs for now.
  421. if eVip.NetworkID == networkID {
  422. vip, _, err := net.ParseCIDR(eVip.Addr)
  423. if err != nil {
  424. return ""
  425. }
  426. return vip.String()
  427. }
  428. }
  429. return ""
  430. }
  431. func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig {
  432. if len(c.task.Networks) == 0 {
  433. return nil
  434. }
  435. logrus.Debugf("Creating service config in agent for t = %+v", c.task)
  436. svcCfg := &clustertypes.ServiceConfig{
  437. Name: c.task.ServiceAnnotations.Name,
  438. Aliases: make(map[string][]string),
  439. ID: c.task.ServiceID,
  440. VirtualAddresses: make(map[string]*clustertypes.VirtualAddress),
  441. }
  442. for _, na := range c.task.Networks {
  443. svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{
  444. // We support only IPv4 virtual IP for now.
  445. IPv4: c.virtualIP(na.Network.ID),
  446. }
  447. if len(na.Aliases) > 0 {
  448. svcCfg.Aliases[na.Network.ID] = na.Aliases
  449. }
  450. }
  451. if c.task.Endpoint != nil {
  452. for _, ePort := range c.task.Endpoint.Ports {
  453. if ePort.PublishMode != api.PublishModeIngress {
  454. continue
  455. }
  456. svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{
  457. Name: ePort.Name,
  458. Protocol: int32(ePort.Protocol),
  459. TargetPort: ePort.TargetPort,
  460. PublishedPort: ePort.PublishedPort,
  461. })
  462. }
  463. }
  464. return svcCfg
  465. }
  466. // networks returns a list of network names attached to the container. The
  467. // returned name can be used to lookup the corresponding network create
  468. // options.
  469. func (c *containerConfig) networks() []string {
  470. var networks []string
  471. for name := range c.networksAttachments {
  472. networks = append(networks, name)
  473. }
  474. return networks
  475. }
  476. func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) {
  477. na, ok := c.networksAttachments[name]
  478. if !ok {
  479. return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced")
  480. }
  481. options := types.NetworkCreate{
  482. // ID: na.Network.ID,
  483. Driver: na.Network.DriverState.Name,
  484. IPAM: &network.IPAM{
  485. Driver: na.Network.IPAM.Driver.Name,
  486. Options: na.Network.IPAM.Driver.Options,
  487. },
  488. Options: na.Network.DriverState.Options,
  489. Labels: na.Network.Spec.Annotations.Labels,
  490. Internal: na.Network.Spec.Internal,
  491. Attachable: na.Network.Spec.Attachable,
  492. Ingress: na.Network.Spec.Ingress,
  493. EnableIPv6: na.Network.Spec.Ipv6Enabled,
  494. CheckDuplicate: true,
  495. }
  496. for _, ic := range na.Network.IPAM.Configs {
  497. c := network.IPAMConfig{
  498. Subnet: ic.Subnet,
  499. IPRange: ic.Range,
  500. Gateway: ic.Gateway,
  501. }
  502. options.IPAM.Config = append(options.IPAM.Config, c)
  503. }
  504. return clustertypes.NetworkCreateRequest{
  505. ID: na.Network.ID,
  506. NetworkCreateRequest: types.NetworkCreateRequest{
  507. Name: name,
  508. NetworkCreate: options,
  509. },
  510. }, nil
  511. }
  512. func (c containerConfig) eventFilter() filters.Args {
  513. filter := filters.NewArgs()
  514. filter.Add("type", events.ContainerEventType)
  515. filter.Add("name", c.name())
  516. filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID))
  517. return filter
  518. }