99a98ccc14
This PR adds support for running regular containers to be connected to swarm mode multi-host network so that: - containers connected to the same network across the cluster can discover and connect to each other. - Get access to services(and their associated loadbalancers) connected to the same network Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
406 lines
12 KiB
Go
406 lines
12 KiB
Go
// +build linux freebsd
|
|
|
|
package daemon
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
containertypes "github.com/docker/docker/api/types/container"
|
|
networktypes "github.com/docker/docker/api/types/network"
|
|
"github.com/docker/docker/container"
|
|
"github.com/docker/docker/daemon/links"
|
|
"github.com/docker/docker/daemon/network"
|
|
"github.com/docker/docker/pkg/fileutils"
|
|
"github.com/docker/docker/pkg/idtools"
|
|
"github.com/docker/docker/pkg/mount"
|
|
"github.com/docker/docker/pkg/stringid"
|
|
"github.com/docker/docker/runconfig"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/devices"
|
|
"github.com/opencontainers/runc/libcontainer/label"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
)
|
|
|
|
func u32Ptr(i int64) *uint32 { u := uint32(i); return &u }
|
|
func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
|
|
|
|
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
|
|
var env []string
|
|
children := daemon.children(container)
|
|
|
|
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
if bridgeSettings == nil || bridgeSettings.EndpointSettings == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
for linkAlias, child := range children {
|
|
if !child.IsRunning() {
|
|
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
|
|
}
|
|
|
|
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
if childBridgeSettings == nil || childBridgeSettings.EndpointSettings == nil {
|
|
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
|
|
}
|
|
|
|
link := links.NewLink(
|
|
bridgeSettings.IPAddress,
|
|
childBridgeSettings.IPAddress,
|
|
linkAlias,
|
|
child.Config.Env,
|
|
child.Config.ExposedPorts,
|
|
)
|
|
|
|
for _, envVar := range link.ToEnv() {
|
|
env = append(env, envVar)
|
|
}
|
|
}
|
|
|
|
return env, nil
|
|
}
|
|
|
|
// getSize returns the real size & virtual size of the container.
|
|
func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
|
|
var (
|
|
sizeRw, sizeRootfs int64
|
|
err error
|
|
)
|
|
|
|
if err := daemon.Mount(container); err != nil {
|
|
logrus.Errorf("Failed to compute size of container rootfs %s: %s", container.ID, err)
|
|
return sizeRw, sizeRootfs
|
|
}
|
|
defer daemon.Unmount(container)
|
|
|
|
sizeRw, err = container.RWLayer.Size()
|
|
if err != nil {
|
|
logrus.Errorf("Driver %s couldn't return diff size of container %s: %s",
|
|
daemon.GraphDriverName(), container.ID, err)
|
|
// FIXME: GetSize should return an error. Not changing it now in case
|
|
// there is a side-effect.
|
|
sizeRw = -1
|
|
}
|
|
|
|
if parent := container.RWLayer.Parent(); parent != nil {
|
|
sizeRootfs, err = parent.Size()
|
|
if err != nil {
|
|
sizeRootfs = -1
|
|
} else if sizeRw != -1 {
|
|
sizeRootfs += sizeRw
|
|
}
|
|
}
|
|
return sizeRw, sizeRootfs
|
|
}
|
|
|
|
// ConnectToNetwork connects a container to a network
|
|
func (daemon *Daemon) ConnectToNetwork(container *container.Container, idOrName string, endpointConfig *networktypes.EndpointSettings) error {
|
|
if endpointConfig == nil {
|
|
endpointConfig = &networktypes.EndpointSettings{}
|
|
}
|
|
if !container.Running {
|
|
if container.RemovalInProgress || container.Dead {
|
|
return errRemovalContainer(container.ID)
|
|
}
|
|
|
|
n, err := daemon.FindNetwork(idOrName)
|
|
if err == nil && n != nil {
|
|
if err := daemon.updateNetworkConfig(container, n, endpointConfig, true); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
container.NetworkSettings.Networks[idOrName] = &network.EndpointSettings{
|
|
EndpointSettings: endpointConfig,
|
|
}
|
|
}
|
|
} else {
|
|
if err := daemon.connectToNetwork(container, idOrName, endpointConfig, true); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := container.ToDiskLocking(); err != nil {
|
|
return fmt.Errorf("Error saving container to disk: %v", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// DisconnectFromNetwork disconnects container from network n.
|
|
func (daemon *Daemon) DisconnectFromNetwork(container *container.Container, networkName string, force bool) error {
|
|
n, err := daemon.FindNetwork(networkName)
|
|
if !container.Running || (err != nil && force) {
|
|
if container.RemovalInProgress || container.Dead {
|
|
return errRemovalContainer(container.ID)
|
|
}
|
|
// In case networkName is resolved we will use n.Name()
|
|
// this will cover the case where network id is passed.
|
|
if n != nil {
|
|
networkName = n.Name()
|
|
}
|
|
if _, ok := container.NetworkSettings.Networks[networkName]; !ok {
|
|
return fmt.Errorf("container %s is not connected to the network %s", container.ID, networkName)
|
|
}
|
|
delete(container.NetworkSettings.Networks, networkName)
|
|
} else if err == nil {
|
|
if container.HostConfig.NetworkMode.IsHost() && containertypes.NetworkMode(n.Type()).IsHost() {
|
|
return runconfig.ErrConflictHostNetwork
|
|
}
|
|
|
|
if err := daemon.disconnectFromNetwork(container, n, false); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
return err
|
|
}
|
|
|
|
if err := container.ToDiskLocking(); err != nil {
|
|
return fmt.Errorf("Error saving container to disk: %v", err)
|
|
}
|
|
|
|
if n != nil {
|
|
attributes := map[string]string{
|
|
"container": container.ID,
|
|
}
|
|
daemon.LogNetworkEventWithAttributes(n, "disconnect", attributes)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) getIpcContainer(container *container.Container) (*container.Container, error) {
|
|
containerID := container.HostConfig.IpcMode.Container()
|
|
c, err := daemon.GetContainer(containerID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !c.IsRunning() {
|
|
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
|
|
}
|
|
if c.IsRestarting() {
|
|
return nil, errContainerIsRestarting(container.ID)
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
func (daemon *Daemon) getPidContainer(container *container.Container) (*container.Container, error) {
|
|
containerID := container.HostConfig.PidMode.Container()
|
|
c, err := daemon.GetContainer(containerID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !c.IsRunning() {
|
|
return nil, fmt.Errorf("cannot join PID of a non running container: %s", containerID)
|
|
}
|
|
if c.IsRestarting() {
|
|
return nil, errContainerIsRestarting(container.ID)
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
|
|
var err error
|
|
|
|
c.ShmPath, err = c.ShmResourcePath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if c.HostConfig.IpcMode.IsContainer() {
|
|
ic, err := daemon.getIpcContainer(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.ShmPath = ic.ShmPath
|
|
} else if c.HostConfig.IpcMode.IsHost() {
|
|
if _, err := os.Stat("/dev/shm"); err != nil {
|
|
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
|
|
}
|
|
c.ShmPath = "/dev/shm"
|
|
} else {
|
|
rootUID, rootGID := daemon.GetRemappedUIDGID()
|
|
if !c.HasMountFor("/dev/shm") {
|
|
shmPath, err := c.ShmResourcePath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
|
|
return err
|
|
}
|
|
|
|
shmSize := container.DefaultSHMSize
|
|
if c.HostConfig.ShmSize != 0 {
|
|
shmSize = c.HostConfig.ShmSize
|
|
}
|
|
shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
|
|
if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
|
|
return fmt.Errorf("mounting shm tmpfs: %s", err)
|
|
}
|
|
if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) mountVolumes(container *container.Container) error {
|
|
mounts, err := daemon.setupMounts(container)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, m := range mounts {
|
|
dest, err := container.GetResourcePath(m.Destination)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var stat os.FileInfo
|
|
stat, err = os.Stat(m.Source)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err = fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil {
|
|
return err
|
|
}
|
|
|
|
opts := "rbind,ro"
|
|
if m.Writable {
|
|
opts = "rbind,rw"
|
|
}
|
|
|
|
if err := mount.Mount(m.Source, dest, "bind", opts); err != nil {
|
|
return err
|
|
}
|
|
|
|
// mountVolumes() seems to be called for temporary mounts
|
|
// outside the container. Soon these will be unmounted with
|
|
// lazy unmount option and given we have mounted the rbind,
|
|
// all the submounts will propagate if these are shared. If
|
|
// daemon is running in host namespace and has / as shared
|
|
// then these unmounts will propagate and unmount original
|
|
// mount as well. So make all these mounts rprivate.
|
|
// Do not use propagation property of volume as that should
|
|
// apply only when mounting happen inside the container.
|
|
if err := mount.MakeRPrivate(dest); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func killProcessDirectly(container *container.Container) error {
|
|
if _, err := container.WaitStop(10 * time.Second); err != nil {
|
|
// Ensure that we don't kill ourselves
|
|
if pid := container.GetPID(); pid != 0 {
|
|
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID))
|
|
if err := syscall.Kill(pid, 9); err != nil {
|
|
if err != syscall.ESRCH {
|
|
return err
|
|
}
|
|
e := errNoSuchProcess{pid, 9}
|
|
logrus.Debug(e)
|
|
return e
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func specDevice(d *configs.Device) specs.Device {
|
|
return specs.Device{
|
|
Type: string(d.Type),
|
|
Path: d.Path,
|
|
Major: d.Major,
|
|
Minor: d.Minor,
|
|
FileMode: fmPtr(int64(d.FileMode)),
|
|
UID: u32Ptr(int64(d.Uid)),
|
|
GID: u32Ptr(int64(d.Gid)),
|
|
}
|
|
}
|
|
|
|
func specDeviceCgroup(d *configs.Device) specs.DeviceCgroup {
|
|
t := string(d.Type)
|
|
return specs.DeviceCgroup{
|
|
Allow: true,
|
|
Type: &t,
|
|
Major: &d.Major,
|
|
Minor: &d.Minor,
|
|
Access: &d.Permissions,
|
|
}
|
|
}
|
|
|
|
func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []specs.Device, devPermissions []specs.DeviceCgroup, err error) {
|
|
resolvedPathOnHost := deviceMapping.PathOnHost
|
|
|
|
// check if it is a symbolic link
|
|
if src, e := os.Lstat(deviceMapping.PathOnHost); e == nil && src.Mode()&os.ModeSymlink == os.ModeSymlink {
|
|
if linkedPathOnHost, e := filepath.EvalSymlinks(deviceMapping.PathOnHost); e == nil {
|
|
resolvedPathOnHost = linkedPathOnHost
|
|
}
|
|
}
|
|
|
|
device, err := devices.DeviceFromPath(resolvedPathOnHost, deviceMapping.CgroupPermissions)
|
|
// if there was no error, return the device
|
|
if err == nil {
|
|
device.Path = deviceMapping.PathInContainer
|
|
return append(devs, specDevice(device)), append(devPermissions, specDeviceCgroup(device)), nil
|
|
}
|
|
|
|
// if the device is not a device node
|
|
// try to see if it's a directory holding many devices
|
|
if err == devices.ErrNotADevice {
|
|
|
|
// check if it is a directory
|
|
if src, e := os.Stat(resolvedPathOnHost); e == nil && src.IsDir() {
|
|
|
|
// mount the internal devices recursively
|
|
filepath.Walk(resolvedPathOnHost, func(dpath string, f os.FileInfo, e error) error {
|
|
childDevice, e := devices.DeviceFromPath(dpath, deviceMapping.CgroupPermissions)
|
|
if e != nil {
|
|
// ignore the device
|
|
return nil
|
|
}
|
|
|
|
// add the device to userSpecified devices
|
|
childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1)
|
|
devs = append(devs, specDevice(childDevice))
|
|
devPermissions = append(devPermissions, specDeviceCgroup(childDevice))
|
|
|
|
return nil
|
|
})
|
|
}
|
|
}
|
|
|
|
if len(devs) > 0 {
|
|
return devs, devPermissions, nil
|
|
}
|
|
|
|
return devs, devPermissions, fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err)
|
|
}
|
|
|
|
func detachMounted(path string) error {
|
|
return syscall.Unmount(path, syscall.MNT_DETACH)
|
|
}
|
|
|
|
func isLinkable(child *container.Container) bool {
|
|
// A container is linkable only if it belongs to the default network
|
|
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
|
return ok
|
|
}
|
|
|
|
func errRemovalContainer(containerID string) error {
|
|
return fmt.Errorf("Container %s is marked for removal and cannot be connected or disconnected to the network", containerID)
|
|
}
|
|
|
|
func enableIPOnPredefinedNetwork() bool {
|
|
return false
|
|
}
|