moby/daemon/container_operations_unix.go
Jana Radhakrishnan 99a98ccc14 Add support for docker run in swarm mode overlay
This PR adds support for running regular containers to be connected to
swarm mode multi-host network so that:
    - containers connected to the same network across the cluster can
      discover and connect to each other.
    - Get access to services(and their associated loadbalancers)
      connected to the same network

Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
2016-09-07 21:20:41 -07:00

406 lines
12 KiB
Go

// +build linux freebsd
package daemon
import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"github.com/Sirupsen/logrus"
containertypes "github.com/docker/docker/api/types/container"
networktypes "github.com/docker/docker/api/types/network"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/links"
"github.com/docker/docker/daemon/network"
"github.com/docker/docker/pkg/fileutils"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/runconfig"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runtime-spec/specs-go"
)
func u32Ptr(i int64) *uint32 { u := uint32(i); return &u }
func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
var env []string
children := daemon.children(container)
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if bridgeSettings == nil || bridgeSettings.EndpointSettings == nil {
return nil, nil
}
for linkAlias, child := range children {
if !child.IsRunning() {
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
}
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if childBridgeSettings == nil || childBridgeSettings.EndpointSettings == nil {
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
}
link := links.NewLink(
bridgeSettings.IPAddress,
childBridgeSettings.IPAddress,
linkAlias,
child.Config.Env,
child.Config.ExposedPorts,
)
for _, envVar := range link.ToEnv() {
env = append(env, envVar)
}
}
return env, nil
}
// getSize returns the real size & virtual size of the container.
func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
var (
sizeRw, sizeRootfs int64
err error
)
if err := daemon.Mount(container); err != nil {
logrus.Errorf("Failed to compute size of container rootfs %s: %s", container.ID, err)
return sizeRw, sizeRootfs
}
defer daemon.Unmount(container)
sizeRw, err = container.RWLayer.Size()
if err != nil {
logrus.Errorf("Driver %s couldn't return diff size of container %s: %s",
daemon.GraphDriverName(), container.ID, err)
// FIXME: GetSize should return an error. Not changing it now in case
// there is a side-effect.
sizeRw = -1
}
if parent := container.RWLayer.Parent(); parent != nil {
sizeRootfs, err = parent.Size()
if err != nil {
sizeRootfs = -1
} else if sizeRw != -1 {
sizeRootfs += sizeRw
}
}
return sizeRw, sizeRootfs
}
// ConnectToNetwork connects a container to a network
func (daemon *Daemon) ConnectToNetwork(container *container.Container, idOrName string, endpointConfig *networktypes.EndpointSettings) error {
if endpointConfig == nil {
endpointConfig = &networktypes.EndpointSettings{}
}
if !container.Running {
if container.RemovalInProgress || container.Dead {
return errRemovalContainer(container.ID)
}
n, err := daemon.FindNetwork(idOrName)
if err == nil && n != nil {
if err := daemon.updateNetworkConfig(container, n, endpointConfig, true); err != nil {
return err
}
} else {
container.NetworkSettings.Networks[idOrName] = &network.EndpointSettings{
EndpointSettings: endpointConfig,
}
}
} else {
if err := daemon.connectToNetwork(container, idOrName, endpointConfig, true); err != nil {
return err
}
}
if err := container.ToDiskLocking(); err != nil {
return fmt.Errorf("Error saving container to disk: %v", err)
}
return nil
}
// DisconnectFromNetwork disconnects container from network n.
func (daemon *Daemon) DisconnectFromNetwork(container *container.Container, networkName string, force bool) error {
n, err := daemon.FindNetwork(networkName)
if !container.Running || (err != nil && force) {
if container.RemovalInProgress || container.Dead {
return errRemovalContainer(container.ID)
}
// In case networkName is resolved we will use n.Name()
// this will cover the case where network id is passed.
if n != nil {
networkName = n.Name()
}
if _, ok := container.NetworkSettings.Networks[networkName]; !ok {
return fmt.Errorf("container %s is not connected to the network %s", container.ID, networkName)
}
delete(container.NetworkSettings.Networks, networkName)
} else if err == nil {
if container.HostConfig.NetworkMode.IsHost() && containertypes.NetworkMode(n.Type()).IsHost() {
return runconfig.ErrConflictHostNetwork
}
if err := daemon.disconnectFromNetwork(container, n, false); err != nil {
return err
}
} else {
return err
}
if err := container.ToDiskLocking(); err != nil {
return fmt.Errorf("Error saving container to disk: %v", err)
}
if n != nil {
attributes := map[string]string{
"container": container.ID,
}
daemon.LogNetworkEventWithAttributes(n, "disconnect", attributes)
}
return nil
}
func (daemon *Daemon) getIpcContainer(container *container.Container) (*container.Container, error) {
containerID := container.HostConfig.IpcMode.Container()
c, err := daemon.GetContainer(containerID)
if err != nil {
return nil, err
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
}
if c.IsRestarting() {
return nil, errContainerIsRestarting(container.ID)
}
return c, nil
}
func (daemon *Daemon) getPidContainer(container *container.Container) (*container.Container, error) {
containerID := container.HostConfig.PidMode.Container()
c, err := daemon.GetContainer(containerID)
if err != nil {
return nil, err
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join PID of a non running container: %s", containerID)
}
if c.IsRestarting() {
return nil, errContainerIsRestarting(container.ID)
}
return c, nil
}
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
var err error
c.ShmPath, err = c.ShmResourcePath()
if err != nil {
return err
}
if c.HostConfig.IpcMode.IsContainer() {
ic, err := daemon.getIpcContainer(c)
if err != nil {
return err
}
c.ShmPath = ic.ShmPath
} else if c.HostConfig.IpcMode.IsHost() {
if _, err := os.Stat("/dev/shm"); err != nil {
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
}
c.ShmPath = "/dev/shm"
} else {
rootUID, rootGID := daemon.GetRemappedUIDGID()
if !c.HasMountFor("/dev/shm") {
shmPath, err := c.ShmResourcePath()
if err != nil {
return err
}
if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
return err
}
shmSize := container.DefaultSHMSize
if c.HostConfig.ShmSize != 0 {
shmSize = c.HostConfig.ShmSize
}
shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
return fmt.Errorf("mounting shm tmpfs: %s", err)
}
if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
return err
}
}
}
return nil
}
func (daemon *Daemon) mountVolumes(container *container.Container) error {
mounts, err := daemon.setupMounts(container)
if err != nil {
return err
}
for _, m := range mounts {
dest, err := container.GetResourcePath(m.Destination)
if err != nil {
return err
}
var stat os.FileInfo
stat, err = os.Stat(m.Source)
if err != nil {
return err
}
if err = fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
opts := "rbind,ro"
if m.Writable {
opts = "rbind,rw"
}
if err := mount.Mount(m.Source, dest, "bind", opts); err != nil {
return err
}
// mountVolumes() seems to be called for temporary mounts
// outside the container. Soon these will be unmounted with
// lazy unmount option and given we have mounted the rbind,
// all the submounts will propagate if these are shared. If
// daemon is running in host namespace and has / as shared
// then these unmounts will propagate and unmount original
// mount as well. So make all these mounts rprivate.
// Do not use propagation property of volume as that should
// apply only when mounting happen inside the container.
if err := mount.MakeRPrivate(dest); err != nil {
return err
}
}
return nil
}
func killProcessDirectly(container *container.Container) error {
if _, err := container.WaitStop(10 * time.Second); err != nil {
// Ensure that we don't kill ourselves
if pid := container.GetPID(); pid != 0 {
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID))
if err := syscall.Kill(pid, 9); err != nil {
if err != syscall.ESRCH {
return err
}
e := errNoSuchProcess{pid, 9}
logrus.Debug(e)
return e
}
}
}
return nil
}
func specDevice(d *configs.Device) specs.Device {
return specs.Device{
Type: string(d.Type),
Path: d.Path,
Major: d.Major,
Minor: d.Minor,
FileMode: fmPtr(int64(d.FileMode)),
UID: u32Ptr(int64(d.Uid)),
GID: u32Ptr(int64(d.Gid)),
}
}
func specDeviceCgroup(d *configs.Device) specs.DeviceCgroup {
t := string(d.Type)
return specs.DeviceCgroup{
Allow: true,
Type: &t,
Major: &d.Major,
Minor: &d.Minor,
Access: &d.Permissions,
}
}
func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []specs.Device, devPermissions []specs.DeviceCgroup, err error) {
resolvedPathOnHost := deviceMapping.PathOnHost
// check if it is a symbolic link
if src, e := os.Lstat(deviceMapping.PathOnHost); e == nil && src.Mode()&os.ModeSymlink == os.ModeSymlink {
if linkedPathOnHost, e := filepath.EvalSymlinks(deviceMapping.PathOnHost); e == nil {
resolvedPathOnHost = linkedPathOnHost
}
}
device, err := devices.DeviceFromPath(resolvedPathOnHost, deviceMapping.CgroupPermissions)
// if there was no error, return the device
if err == nil {
device.Path = deviceMapping.PathInContainer
return append(devs, specDevice(device)), append(devPermissions, specDeviceCgroup(device)), nil
}
// if the device is not a device node
// try to see if it's a directory holding many devices
if err == devices.ErrNotADevice {
// check if it is a directory
if src, e := os.Stat(resolvedPathOnHost); e == nil && src.IsDir() {
// mount the internal devices recursively
filepath.Walk(resolvedPathOnHost, func(dpath string, f os.FileInfo, e error) error {
childDevice, e := devices.DeviceFromPath(dpath, deviceMapping.CgroupPermissions)
if e != nil {
// ignore the device
return nil
}
// add the device to userSpecified devices
childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1)
devs = append(devs, specDevice(childDevice))
devPermissions = append(devPermissions, specDeviceCgroup(childDevice))
return nil
})
}
}
if len(devs) > 0 {
return devs, devPermissions, nil
}
return devs, devPermissions, fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err)
}
func detachMounted(path string) error {
return syscall.Unmount(path, syscall.MNT_DETACH)
}
func isLinkable(child *container.Container) bool {
// A container is linkable only if it belongs to the default network
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
return ok
}
func errRemovalContainer(containerID string) error {
return fmt.Errorf("Container %s is marked for removal and cannot be connected or disconnected to the network", containerID)
}
func enableIPOnPredefinedNetwork() bool {
return false
}