container.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. package container
  2. import (
  3. "errors"
  4. "fmt"
  5. "net"
  6. "strings"
  7. "time"
  8. "github.com/Sirupsen/logrus"
  9. clustertypes "github.com/docker/docker/daemon/cluster/provider"
  10. "github.com/docker/docker/reference"
  11. "github.com/docker/engine-api/types"
  12. enginecontainer "github.com/docker/engine-api/types/container"
  13. "github.com/docker/engine-api/types/network"
  14. "github.com/docker/swarmkit/agent/exec"
  15. "github.com/docker/swarmkit/api"
  16. )
  17. const (
  18. // Explicitly use the kernel's default setting for CPU quota of 100ms.
  19. // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
  20. cpuQuotaPeriod = 100 * time.Millisecond
  21. // systemLabelPrefix represents the reserved namespace for system labels.
  22. systemLabelPrefix = "com.docker.swarm"
  23. )
  24. // containerConfig converts task properties into docker container compatible
  25. // components.
  26. type containerConfig struct {
  27. task *api.Task
  28. networksAttachments map[string]*api.NetworkAttachment
  29. }
  30. // newContainerConfig returns a validated container config. No methods should
  31. // return an error if this function returns without error.
  32. func newContainerConfig(t *api.Task) (*containerConfig, error) {
  33. var c containerConfig
  34. return &c, c.setTask(t)
  35. }
  36. func (c *containerConfig) setTask(t *api.Task) error {
  37. container := t.Spec.GetContainer()
  38. if container == nil {
  39. return exec.ErrRuntimeUnsupported
  40. }
  41. if container.Image == "" {
  42. return ErrImageRequired
  43. }
  44. // index the networks by name
  45. c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks))
  46. for _, attachment := range t.Networks {
  47. c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment
  48. }
  49. c.task = t
  50. return nil
  51. }
  52. func (c *containerConfig) endpoint() *api.Endpoint {
  53. return c.task.Endpoint
  54. }
  55. func (c *containerConfig) spec() *api.ContainerSpec {
  56. return c.task.Spec.GetContainer()
  57. }
  58. func (c *containerConfig) name() string {
  59. if c.task.Annotations.Name != "" {
  60. // if set, use the container Annotations.Name field, set in the orchestrator.
  61. return c.task.Annotations.Name
  62. }
  63. // fallback to service.slot.id.
  64. return strings.Join([]string{c.task.ServiceAnnotations.Name, fmt.Sprint(c.task.Slot), c.task.ID}, ".")
  65. }
  66. func (c *containerConfig) image() string {
  67. raw := c.spec().Image
  68. ref, err := reference.ParseNamed(raw)
  69. if err != nil {
  70. return raw
  71. }
  72. return reference.WithDefaultTag(ref).String()
  73. }
  74. func (c *containerConfig) volumes() map[string]struct{} {
  75. r := make(map[string]struct{})
  76. for _, m := range c.spec().Mounts {
  77. // pick off all the volume mounts.
  78. if m.Type != api.MountTypeVolume || m.Source != "" {
  79. continue
  80. }
  81. r[m.Target] = struct{}{}
  82. }
  83. return r
  84. }
  85. func (c *containerConfig) config() *enginecontainer.Config {
  86. config := &enginecontainer.Config{
  87. Labels: c.labels(),
  88. User: c.spec().User,
  89. Env: c.spec().Env,
  90. WorkingDir: c.spec().Dir,
  91. Image: c.image(),
  92. Volumes: c.volumes(),
  93. }
  94. if len(c.spec().Command) > 0 {
  95. // If Command is provided, we replace the whole invocation with Command
  96. // by replacing Entrypoint and specifying Cmd. Args is ignored in this
  97. // case.
  98. config.Entrypoint = append(config.Entrypoint, c.spec().Command...)
  99. config.Cmd = append(config.Cmd, c.spec().Args...)
  100. } else if len(c.spec().Args) > 0 {
  101. // In this case, we assume the image has an Entrypoint and Args
  102. // specifies the arguments for that entrypoint.
  103. config.Cmd = c.spec().Args
  104. }
  105. return config
  106. }
  107. func (c *containerConfig) labels() map[string]string {
  108. var (
  109. system = map[string]string{
  110. "task": "", // mark as cluster task
  111. "task.id": c.task.ID,
  112. "task.name": fmt.Sprintf("%v.%v", c.task.ServiceAnnotations.Name, c.task.Slot),
  113. "node.id": c.task.NodeID,
  114. "service.id": c.task.ServiceID,
  115. "service.name": c.task.ServiceAnnotations.Name,
  116. }
  117. labels = make(map[string]string)
  118. )
  119. // base labels are those defined in the spec.
  120. for k, v := range c.spec().Labels {
  121. labels[k] = v
  122. }
  123. // we then apply the overrides from the task, which may be set via the
  124. // orchestrator.
  125. for k, v := range c.task.Annotations.Labels {
  126. labels[k] = v
  127. }
  128. // finally, we apply the system labels, which override all labels.
  129. for k, v := range system {
  130. labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v
  131. }
  132. return labels
  133. }
  134. func (c *containerConfig) bindMounts() []string {
  135. var r []string
  136. for _, val := range c.spec().Mounts {
  137. mask := getMountMask(&val)
  138. if val.Type == api.MountTypeBind || (val.Type == api.MountTypeVolume && val.Source != "") {
  139. r = append(r, fmt.Sprintf("%s:%s:%s", val.Source, val.Target, mask))
  140. }
  141. }
  142. return r
  143. }
  144. func getMountMask(m *api.Mount) string {
  145. maskOpts := []string{"ro"}
  146. if m.Writable {
  147. maskOpts[0] = "rw"
  148. }
  149. if m.BindOptions != nil {
  150. switch m.BindOptions.Propagation {
  151. case api.MountPropagationPrivate:
  152. maskOpts = append(maskOpts, "private")
  153. case api.MountPropagationRPrivate:
  154. maskOpts = append(maskOpts, "rprivate")
  155. case api.MountPropagationShared:
  156. maskOpts = append(maskOpts, "shared")
  157. case api.MountPropagationRShared:
  158. maskOpts = append(maskOpts, "rshared")
  159. case api.MountPropagationSlave:
  160. maskOpts = append(maskOpts, "slave")
  161. case api.MountPropagationRSlave:
  162. maskOpts = append(maskOpts, "rslave")
  163. }
  164. }
  165. if m.VolumeOptions != nil {
  166. if !m.VolumeOptions.Populate {
  167. maskOpts = append(maskOpts, "nocopy")
  168. }
  169. }
  170. return strings.Join(maskOpts, ",")
  171. }
  172. func (c *containerConfig) hostConfig() *enginecontainer.HostConfig {
  173. return &enginecontainer.HostConfig{
  174. Resources: c.resources(),
  175. Binds: c.bindMounts(),
  176. }
  177. }
  178. // This handles the case of volumes that are defined inside a service Mount
  179. func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *types.VolumeCreateRequest {
  180. var (
  181. driverName string
  182. driverOpts map[string]string
  183. labels map[string]string
  184. )
  185. if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil {
  186. driverName = mount.VolumeOptions.DriverConfig.Name
  187. driverOpts = mount.VolumeOptions.DriverConfig.Options
  188. labels = mount.VolumeOptions.Labels
  189. }
  190. if mount.VolumeOptions != nil {
  191. return &types.VolumeCreateRequest{
  192. Name: mount.Source,
  193. Driver: driverName,
  194. DriverOpts: driverOpts,
  195. Labels: labels,
  196. }
  197. }
  198. return nil
  199. }
  200. func (c *containerConfig) resources() enginecontainer.Resources {
  201. resources := enginecontainer.Resources{}
  202. // If no limits are specified let the engine use its defaults.
  203. //
  204. // TODO(aluzzardi): We might want to set some limits anyway otherwise
  205. // "unlimited" tasks will step over the reservation of other tasks.
  206. r := c.task.Spec.Resources
  207. if r == nil || r.Limits == nil {
  208. return resources
  209. }
  210. if r.Limits.MemoryBytes > 0 {
  211. resources.Memory = r.Limits.MemoryBytes
  212. }
  213. if r.Limits.NanoCPUs > 0 {
  214. // CPU Period must be set in microseconds.
  215. resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond)
  216. resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9
  217. }
  218. return resources
  219. }
  220. // Docker daemon supports just 1 network during container create.
  221. func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig {
  222. var networks []*api.NetworkAttachment
  223. if c.task.Spec.GetContainer() != nil {
  224. networks = c.task.Networks
  225. }
  226. epConfig := make(map[string]*network.EndpointSettings)
  227. if len(networks) > 0 {
  228. epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0])
  229. }
  230. return &network.NetworkingConfig{EndpointsConfig: epConfig}
  231. }
  232. // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create
  233. func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig {
  234. var networks []*api.NetworkAttachment
  235. if c.task.Spec.GetContainer() != nil {
  236. networks = c.task.Networks
  237. }
  238. // First network is used during container create. Other networks are used in "docker network connect"
  239. if len(networks) < 2 {
  240. return nil
  241. }
  242. epConfig := make(map[string]*network.EndpointSettings)
  243. for _, na := range networks[1:] {
  244. epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na)
  245. }
  246. return &network.NetworkingConfig{EndpointsConfig: epConfig}
  247. }
  248. func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings {
  249. var ipv4, ipv6 string
  250. for _, addr := range na.Addresses {
  251. ip, _, err := net.ParseCIDR(addr)
  252. if err != nil {
  253. continue
  254. }
  255. if ip.To4() != nil {
  256. ipv4 = ip.String()
  257. continue
  258. }
  259. if ip.To16() != nil {
  260. ipv6 = ip.String()
  261. }
  262. }
  263. return &network.EndpointSettings{
  264. IPAMConfig: &network.EndpointIPAMConfig{
  265. IPv4Address: ipv4,
  266. IPv6Address: ipv6,
  267. },
  268. }
  269. }
  270. func (c *containerConfig) virtualIP(networkID string) string {
  271. if c.task.Endpoint == nil {
  272. return ""
  273. }
  274. for _, eVip := range c.task.Endpoint.VirtualIPs {
  275. // We only support IPv4 VIPs for now.
  276. if eVip.NetworkID == networkID {
  277. vip, _, err := net.ParseCIDR(eVip.Addr)
  278. if err != nil {
  279. return ""
  280. }
  281. return vip.String()
  282. }
  283. }
  284. return ""
  285. }
  286. func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig {
  287. if len(c.task.Networks) == 0 {
  288. return nil
  289. }
  290. logrus.Debugf("Creating service config in agent for t = %+v", c.task)
  291. svcCfg := &clustertypes.ServiceConfig{
  292. Name: c.task.ServiceAnnotations.Name,
  293. Aliases: make(map[string][]string),
  294. ID: c.task.ServiceID,
  295. VirtualAddresses: make(map[string]*clustertypes.VirtualAddress),
  296. }
  297. for _, na := range c.task.Networks {
  298. svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{
  299. // We support only IPv4 virtual IP for now.
  300. IPv4: c.virtualIP(na.Network.ID),
  301. }
  302. if len(na.Aliases) > 0 {
  303. svcCfg.Aliases[na.Network.ID] = na.Aliases
  304. }
  305. }
  306. if c.task.Endpoint != nil {
  307. for _, ePort := range c.task.Endpoint.Ports {
  308. svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{
  309. Name: ePort.Name,
  310. Protocol: int32(ePort.Protocol),
  311. TargetPort: ePort.TargetPort,
  312. PublishedPort: ePort.PublishedPort,
  313. })
  314. }
  315. }
  316. return svcCfg
  317. }
  318. // networks returns a list of network names attached to the container. The
  319. // returned name can be used to lookup the corresponding network create
  320. // options.
  321. func (c *containerConfig) networks() []string {
  322. var networks []string
  323. for name := range c.networksAttachments {
  324. networks = append(networks, name)
  325. }
  326. return networks
  327. }
  328. func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) {
  329. na, ok := c.networksAttachments[name]
  330. if !ok {
  331. return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced")
  332. }
  333. options := types.NetworkCreate{
  334. // ID: na.Network.ID,
  335. Driver: na.Network.DriverState.Name,
  336. IPAM: network.IPAM{
  337. Driver: na.Network.IPAM.Driver.Name,
  338. },
  339. Options: na.Network.DriverState.Options,
  340. Labels: na.Network.Spec.Annotations.Labels,
  341. Internal: na.Network.Spec.Internal,
  342. EnableIPv6: na.Network.Spec.Ipv6Enabled,
  343. CheckDuplicate: true,
  344. }
  345. for _, ic := range na.Network.IPAM.Configs {
  346. c := network.IPAMConfig{
  347. Subnet: ic.Subnet,
  348. IPRange: ic.Range,
  349. Gateway: ic.Gateway,
  350. }
  351. options.IPAM.Config = append(options.IPAM.Config, c)
  352. }
  353. return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil
  354. }