Merge pull request #16468 from crosbymichael/bump_libcontainer

Bump libcontainer to 5765dcd086eb0584c0e2eaff9a3ac97b467a98e6
This commit is contained in:
Jess Frazelle 2015-09-24 16:53:28 -07:00
commit b2b76894a1
26 changed files with 397 additions and 1636 deletions

View file

@ -183,3 +183,7 @@ func (container *Container) unmountIpcMounts() error {
func (container *Container) ipcMounts() []execdriver.Mount {
return nil
}
func getDefaultRouteMtu() (int, error) {
return -1, errSystemNotSupported
}

View file

@ -52,7 +52,6 @@ import (
"github.com/docker/docker/volume/local"
"github.com/docker/docker/volume/store"
"github.com/docker/libnetwork"
"github.com/opencontainers/runc/libcontainer/netlink"
)
var (
@ -1076,20 +1075,6 @@ func setDefaultMtu(config *Config) {
var errNoDefaultRoute = errors.New("no default route was found")
// getDefaultRouteMtu returns the MTU for the default route's interface.
func getDefaultRouteMtu() (int, error) {
routes, err := netlink.NetworkGetRoutes()
if err != nil {
return 0, err
}
for _, r := range routes {
if r.Default && r.Iface != nil {
return r.Iface.MTU, nil
}
}
return 0, errNoDefaultRoute
}
// verifyContainerSettings performs validation of the hostconfig and config
// structures.
func (daemon *Daemon) verifyContainerSettings(ctx context.Context, hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {

View file

@ -28,6 +28,7 @@ import (
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/options"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/vishvananda/netlink"
)
const (
@ -554,3 +555,22 @@ func (daemon *Daemon) newBaseContainer(id string) Container {
VolumesRW: make(map[string]bool),
}
}
// getDefaultRouteMtu returns the MTU for the default route's interface.
func getDefaultRouteMtu() (int, error) {
routes, err := netlink.RouteList(nil, 0)
if err != nil {
return 0, err
}
for _, r := range routes {
// a nil Dst means that this is the default route.
if r.Dst == nil {
i, err := net.InterfaceByIndex(r.LinkIndex)
if err != nil {
continue
}
return i.MTU, nil
}
}
return 0, errNoDefaultRoute
}

View file

@ -65,7 +65,13 @@ func (d *Driver) createContainer(ctx context.Context, c *execdriver.Command, hoo
return nil, err
}
}
// add CAP_ prefix to all caps for new libcontainer update to match
// the spec format.
for i, s := range container.Capabilities {
if !strings.HasPrefix(s, "CAP_") {
container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
}
}
container.AdditionalGroups = c.GroupAdd
if c.AppArmorProfile != "" {

View file

@ -6,6 +6,7 @@ import (
"fmt"
"os"
"os/exec"
"strings"
"syscall"
"github.com/docker/docker/context"
@ -36,6 +37,13 @@ func (d *Driver) Exec(ctx context.Context, c *execdriver.Command, processConfig
if processConfig.Privileged {
p.Capabilities = execdriver.GetAllCapabilities()
}
// add CAP_ prefix to all caps for new libcontainer update to match
// the spec format.
for i, s := range p.Capabilities {
if !strings.HasPrefix(s, "CAP_") {
p.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
}
}
config := active.Config()
if err := setupPipes(&config, processConfig, p, pipes); err != nil {

View file

@ -119,6 +119,5 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
newCaps = append(newCaps, strings.ToUpper(cap))
}
}
return newCaps, nil
}

View file

@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce
clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
clone git github.com/opencontainers/runc v0.0.4 # libcontainer
clone git github.com/opencontainers/runc aac9179bbadbf958054ce97ab368ac178140e5da # libcontainer
# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
clone git github.com/coreos/go-systemd v3
clone git github.com/godbus/dbus v2

View file

@ -32,20 +32,20 @@ struct describing how the container is to be created. A sample would look simil
config := &configs.Config{
Rootfs: rootfs,
Capabilities: []string{
"CHOWN",
"DAC_OVERRIDE",
"FSETID",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL",
"AUDIT_WRITE",
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},

View file

@ -12,44 +12,44 @@ import (
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
var capabilityList = map[string]capability.Cap{
"SETPCAP": capability.CAP_SETPCAP,
"SYS_MODULE": capability.CAP_SYS_MODULE,
"SYS_RAWIO": capability.CAP_SYS_RAWIO,
"SYS_PACCT": capability.CAP_SYS_PACCT,
"SYS_ADMIN": capability.CAP_SYS_ADMIN,
"SYS_NICE": capability.CAP_SYS_NICE,
"SYS_RESOURCE": capability.CAP_SYS_RESOURCE,
"SYS_TIME": capability.CAP_SYS_TIME,
"SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG,
"MKNOD": capability.CAP_MKNOD,
"AUDIT_WRITE": capability.CAP_AUDIT_WRITE,
"AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL,
"MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE,
"MAC_ADMIN": capability.CAP_MAC_ADMIN,
"NET_ADMIN": capability.CAP_NET_ADMIN,
"SYSLOG": capability.CAP_SYSLOG,
"CHOWN": capability.CAP_CHOWN,
"NET_RAW": capability.CAP_NET_RAW,
"DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE,
"FOWNER": capability.CAP_FOWNER,
"DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH,
"FSETID": capability.CAP_FSETID,
"KILL": capability.CAP_KILL,
"SETGID": capability.CAP_SETGID,
"SETUID": capability.CAP_SETUID,
"LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE,
"NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
"NET_BROADCAST": capability.CAP_NET_BROADCAST,
"IPC_LOCK": capability.CAP_IPC_LOCK,
"IPC_OWNER": capability.CAP_IPC_OWNER,
"SYS_CHROOT": capability.CAP_SYS_CHROOT,
"SYS_PTRACE": capability.CAP_SYS_PTRACE,
"SYS_BOOT": capability.CAP_SYS_BOOT,
"LEASE": capability.CAP_LEASE,
"SETFCAP": capability.CAP_SETFCAP,
"WAKE_ALARM": capability.CAP_WAKE_ALARM,
"BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND,
"AUDIT_READ": capability.CAP_AUDIT_READ,
"CAP_SETPCAP": capability.CAP_SETPCAP,
"CAP_SYS_MODULE": capability.CAP_SYS_MODULE,
"CAP_SYS_RAWIO": capability.CAP_SYS_RAWIO,
"CAP_SYS_PACCT": capability.CAP_SYS_PACCT,
"CAP_SYS_ADMIN": capability.CAP_SYS_ADMIN,
"CAP_SYS_NICE": capability.CAP_SYS_NICE,
"CAP_SYS_RESOURCE": capability.CAP_SYS_RESOURCE,
"CAP_SYS_TIME": capability.CAP_SYS_TIME,
"CAP_SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG,
"CAP_MKNOD": capability.CAP_MKNOD,
"CAP_AUDIT_WRITE": capability.CAP_AUDIT_WRITE,
"CAP_AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL,
"CAP_MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE,
"CAP_MAC_ADMIN": capability.CAP_MAC_ADMIN,
"CAP_NET_ADMIN": capability.CAP_NET_ADMIN,
"CAP_SYSLOG": capability.CAP_SYSLOG,
"CAP_CHOWN": capability.CAP_CHOWN,
"CAP_NET_RAW": capability.CAP_NET_RAW,
"CAP_DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE,
"CAP_FOWNER": capability.CAP_FOWNER,
"CAP_DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH,
"CAP_FSETID": capability.CAP_FSETID,
"CAP_KILL": capability.CAP_KILL,
"CAP_SETGID": capability.CAP_SETGID,
"CAP_SETUID": capability.CAP_SETUID,
"CAP_LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE,
"CAP_NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
"CAP_NET_BROADCAST": capability.CAP_NET_BROADCAST,
"CAP_IPC_LOCK": capability.CAP_IPC_LOCK,
"CAP_IPC_OWNER": capability.CAP_IPC_OWNER,
"CAP_SYS_CHROOT": capability.CAP_SYS_CHROOT,
"CAP_SYS_PTRACE": capability.CAP_SYS_PTRACE,
"CAP_SYS_BOOT": capability.CAP_SYS_BOOT,
"CAP_LEASE": capability.CAP_LEASE,
"CAP_SETFCAP": capability.CAP_SETFCAP,
"CAP_WAKE_ALARM": capability.CAP_WAKE_ALARM,
"CAP_BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND,
"CAP_AUDIT_READ": capability.CAP_AUDIT_READ,
}
func newCapWhitelist(caps []string) (*whitelist, error) {

View file

@ -5,7 +5,6 @@ package cgroups
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
@ -105,12 +104,12 @@ type Mount struct {
Subsystems []string
}
func (m Mount) GetThisCgroupDir() (string, error) {
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
return GetThisCgroupDir(m.Subsystems[0])
return getControllerPath(m.Subsystems[0], cgroups)
}
func GetCgroupMounts() ([]Mount, error) {
@ -176,23 +175,22 @@ func GetAllSubsystems() ([]string, error) {
// Returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
f, err := os.Open("/proc/self/cgroup")
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
}
defer f.Close()
return ParseCgroupFile(subsystem, f)
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupDir(subsystem string) (string, error) {
f, err := os.Open("/proc/1/cgroup")
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
}
defer f.Close()
return ParseCgroupFile(subsystem, f)
return getControllerPath(subsystem, cgroups)
}
func ReadProcsFile(dir string) ([]int, error) {
@ -219,23 +217,40 @@ func ReadProcsFile(dir string) ([]int, error) {
return out, nil
}
func ParseCgroupFile(subsystem string, r io.Reader) (string, error) {
s := bufio.NewScanner(r)
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
s := bufio.NewScanner(f)
cgroups := make(map[string]string)
for s.Scan() {
if err := s.Err(); err != nil {
return "", err
return nil, err
}
text := s.Text()
parts := strings.Split(text, ":")
for _, subs := range strings.Split(parts[1], ",") {
if subs == subsystem || subs == cgroupNamePrefix+subsystem {
return parts[2], nil
}
cgroups[subs] = parts[2]
}
}
return cgroups, nil
}
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
if p, ok := cgroups[subsystem]; ok {
return p, nil
}
if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
return p, nil
}
return "", NewNotFoundError(subsystem)
}

View file

@ -20,8 +20,12 @@ type IDMap struct {
}
// Seccomp represents syscall restrictions
// By default, only the native architecture of the kernel is allowed to be used
// for syscalls. Additional architectures can be added by specifying them in
// Architectures.
type Seccomp struct {
DefaultAction Action `json:"default_action"`
Architectures []string `json:"architectures"`
Syscalls []*Syscall `json:"syscalls"`
}
@ -169,6 +173,9 @@ type Config struct {
// Hooks are a collection of actions to perform at various container lifecycle events.
// Hooks are not able to be marshaled to json but they are also not needed to.
Hooks *Hooks `json:"-"`
// Version is the version of opencontainer specification that is supported.
Version string `json:"version"`
}
type Hooks struct {
@ -182,9 +189,10 @@ type Hooks struct {
// HookState is the payload provided to a hook on execution.
type HookState struct {
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
Version string `json:"version"`
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
}
type Hook interface {

View file

@ -13,6 +13,9 @@ type Mount struct {
// Mount flags.
Flags int `json:"flags"`
// Propagation Flags
PropagationFlags []int `json:"propagation_flags"`
// Mount data applied to the mount.
Data string `json:"data"`

View file

@ -75,7 +75,7 @@ func (c *linuxConsole) Close() error {
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
func (c *linuxConsole) mount(rootfs, mountLabel string) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {

View file

@ -250,8 +250,9 @@ func (c *linuxContainer) Destroy() error {
c.initProcess = nil
if c.config.Hooks != nil {
s := configs.HookState{
ID: c.id,
Root: c.config.Rootfs,
Version: c.config.Version,
ID: c.id,
Root: c.config.Rootfs,
}
for _, hook := range c.config.Hooks.Poststop {
if err := hook.Run(s); err != nil {
@ -423,7 +424,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
return err
}
err = c.criuSwrk(nil, req, criuOpts)
err = c.criuSwrk(nil, req, criuOpts, false)
if err != nil {
return err
}
@ -516,6 +517,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
FileLocks: proto.Bool(criuOpts.FileLocks),
},
}
for _, m := range c.config.Mounts {
switch m.Device {
case "bind":
@ -573,14 +575,36 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
}
}
err = c.criuSwrk(process, req, criuOpts)
err = c.criuSwrk(process, req, criuOpts, true)
if err != nil {
return err
}
return nil
}
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts) error {
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
if err := c.cgroupManager.Apply(pid); err != nil {
return err
}
path := fmt.Sprintf("/proc/%d/cgroup", pid)
cgroupsPaths, err := cgroups.ParseCgroupFile(path)
if err != nil {
return err
}
for c, p := range cgroupsPaths {
cgroupRoot := &criurpc.CgroupRoot{
Ctrl: proto.String(c),
Path: proto.String(p),
}
req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot)
}
return nil
}
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return err
@ -614,6 +638,13 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
}
}()
if applyCgroups {
err := c.criuApplyCgroups(cmd.Process.Pid, req)
if err != nil {
return err
}
}
var extFds []string
if process != nil {
extFds, err = getPipeFds(cmd.Process.Pid)

View file

@ -6,6 +6,7 @@ import (
"encoding/json"
"fmt"
"io/ioutil"
"net"
"os"
"strconv"
"strings"
@ -14,10 +15,10 @@ import (
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/netlink"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/vishvananda/netlink"
)
type initType string
@ -186,7 +187,11 @@ func setupUser(config *initConfig) error {
return err
}
}
// before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil {
return err
}
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
@ -207,6 +212,34 @@ func setupUser(config *initConfig) error {
return nil
}
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(u *user.ExecUser) error {
var null syscall.Stat_t
if err := syscall.Stat("/dev/null", &null); err != nil {
return err
}
for _, fd := range []uintptr{
os.Stdin.Fd(),
os.Stderr.Fd(),
os.Stdout.Fd(),
} {
var s syscall.Stat_t
if err := syscall.Fstat(int(fd), &s); err != nil {
return err
}
// skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev {
continue
}
if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
return err
}
}
return nil
}
// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
for _, config := range config.Networks {
@ -223,7 +256,30 @@ func setupNetwork(config *initConfig) error {
func setupRoute(config *configs.Config) error {
for _, config := range config.Routes {
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
_, dst, err := net.ParseCIDR(config.Destination)
if err != nil {
return err
}
src := net.ParseIP(config.Source)
if src == nil {
return fmt.Errorf("Invalid source for route: %s", config.Source)
}
gw := net.ParseIP(config.Gateway)
if gw == nil {
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
}
l, err := netlink.LinkByName(config.InterfaceName)
if err != nil {
return err
}
route := &netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
Dst: dst,
Src: src,
Gw: gw,
LinkIndex: l.Attrs().Index,
}
if err := netlink.RouteAdd(route); err != nil {
return err
}
}

View file

@ -1,2 +0,0 @@
Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
Guillaume J. Charmes <guillaume@docker.com> (@creack)

View file

@ -1,31 +0,0 @@
// Packet netlink provide access to low level Netlink sockets and messages.
//
// Actual implementations are in:
// netlink_linux.go
// netlink_darwin.go
package netlink
import (
"errors"
"net"
)
var (
ErrWrongSockType = errors.New("Wrong socket type")
ErrShortResponse = errors.New("Got short response from netlink")
ErrInterfaceExists = errors.New("Network interface already exists")
)
// A Route is a subnet associated with the interface to reach it.
type Route struct {
*net.IPNet
Iface *net.Interface
Default bool
}
// An IfAddr defines IP network settings for a given network interface
type IfAddr struct {
Iface *net.Interface
IP net.IP
IPNet *net.IPNet
}

View file

@ -1,7 +0,0 @@
// +build arm ppc64 ppc64le
package netlink
func ifrDataByte(b byte) uint8 {
return uint8(b)
}

View file

@ -1,7 +0,0 @@
// +build !arm,!ppc64,!ppc64le
package netlink
func ifrDataByte(b byte) int8 {
return int8(b)
}

View file

@ -1,88 +0,0 @@
// +build !linux
package netlink
import (
"errors"
"net"
)
var (
ErrNotImplemented = errors.New("not implemented")
)
func NetworkGetRoutes() ([]Route, error) {
return nil, ErrNotImplemented
}
func NetworkLinkAdd(name string, linkType string) error {
return ErrNotImplemented
}
func NetworkLinkDel(name string) error {
return ErrNotImplemented
}
func NetworkLinkUp(iface *net.Interface) error {
return ErrNotImplemented
}
func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
return ErrNotImplemented
}
func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
return ErrNotImplemented
}
func AddRoute(destination, source, gateway, device string) error {
return ErrNotImplemented
}
func AddDefaultGw(ip, device string) error {
return ErrNotImplemented
}
func NetworkSetMTU(iface *net.Interface, mtu int) error {
return ErrNotImplemented
}
func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
return ErrNotImplemented
}
func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
return ErrNotImplemented
}
func NetworkChangeName(iface *net.Interface, newName string) error {
return ErrNotImplemented
}
func NetworkSetNsFd(iface *net.Interface, fd int) error {
return ErrNotImplemented
}
func NetworkSetNsPid(iface *net.Interface, nspid int) error {
return ErrNotImplemented
}
func NetworkSetMaster(iface, master *net.Interface) error {
return ErrNotImplemented
}
func NetworkLinkDown(iface *net.Interface) error {
return ErrNotImplemented
}
func CreateBridge(name string, setMacAddr bool) error {
return ErrNotImplemented
}
func DeleteBridge(name string) error {
return ErrNotImplemented
}
func AddToBridge(iface, master *net.Interface) error {
return ErrNotImplemented
}

View file

@ -11,8 +11,8 @@ import (
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/netlink"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/vishvananda/netlink"
)
var strategies = map[string]networkStrategy{
@ -93,11 +93,7 @@ func (l *loopback) create(n *network, nspid int) error {
}
func (l *loopback) initialize(config *network) error {
iface, err := net.InterfaceByName("lo")
if err != nil {
return err
}
return netlink.NetworkLinkUp(iface)
return netlink.LinkSetUp(&netlink.Device{netlink.LinkAttrs{Name: "lo"}})
}
func (l *loopback) attach(n *configs.Network) (err error) {
@ -115,42 +111,36 @@ type veth struct {
}
func (v *veth) detach(n *configs.Network) (err error) {
bridge, err := net.InterfaceByName(n.Bridge)
if err != nil {
return err
}
host, err := net.InterfaceByName(n.HostInterfaceName)
if err != nil {
return err
}
if err := netlink.DelFromBridge(host, bridge); err != nil {
return err
}
return nil
return netlink.LinkSetMaster(&netlink.Device{netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
}
// attach a container network interface to an external network
func (v *veth) attach(n *configs.Network) (err error) {
bridge, err := net.InterfaceByName(n.Bridge)
brl, err := netlink.LinkByName(n.Bridge)
if err != nil {
return err
}
host, err := net.InterfaceByName(n.HostInterfaceName)
br, ok := brl.(*netlink.Bridge)
if !ok {
return fmt.Errorf("Wrong device type %T", brl)
}
host, err := netlink.LinkByName(n.HostInterfaceName)
if err != nil {
return err
}
if err := netlink.AddToBridge(host, bridge); err != nil {
if err := netlink.LinkSetMaster(host, br); err != nil {
return err
}
if err := netlink.NetworkSetMTU(host, n.Mtu); err != nil {
if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
return err
}
if n.HairpinMode {
if err := netlink.SetHairpinMode(host, true); err != nil {
if err := netlink.LinkSetHairpin(host, true); err != nil {
return err
}
}
if err := netlink.NetworkLinkUp(host); err != nil {
if err := netlink.LinkSetUp(host); err != nil {
return err
}
@ -163,26 +153,32 @@ func (v *veth) create(n *network, nspid int) (err error) {
return err
}
n.TempVethPeerName = tmpName
defer func() {
if err != nil {
netlink.NetworkLinkDel(n.HostInterfaceName)
netlink.NetworkLinkDel(n.TempVethPeerName)
}
}()
if n.Bridge == "" {
return fmt.Errorf("bridge is not specified")
}
if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil {
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: n.HostInterfaceName,
TxQLen: n.TxQueueLen,
},
PeerName: n.TempVethPeerName,
}
if err := netlink.LinkAdd(veth); err != nil {
return err
}
defer func() {
if err != nil {
netlink.LinkDel(veth)
}
}()
if err := v.attach(&n.Network); err != nil {
return err
}
child, err := net.InterfaceByName(n.TempVethPeerName)
child, err := netlink.LinkByName(n.TempVethPeerName)
if err != nil {
return err
}
return netlink.NetworkSetNsPid(child, nspid)
return netlink.LinkSetNsPid(child, nspid)
}
func (v *veth) generateTempPeerName() (string, error) {
@ -194,53 +190,68 @@ func (v *veth) initialize(config *network) error {
if peer == "" {
return fmt.Errorf("peer is not specified")
}
child, err := net.InterfaceByName(peer)
child, err := netlink.LinkByName(peer)
if err != nil {
return err
}
if err := netlink.NetworkLinkDown(child); err != nil {
if err := netlink.LinkSetDown(child); err != nil {
return err
}
if err := netlink.NetworkChangeName(child, config.Name); err != nil {
if err := netlink.LinkSetName(child, config.Name); err != nil {
return err
}
// get the interface again after we changed the name as the index also changes.
if child, err = net.InterfaceByName(config.Name); err != nil {
if child, err = netlink.LinkByName(config.Name); err != nil {
return err
}
if config.MacAddress != "" {
if err := netlink.NetworkSetMacAddress(child, config.MacAddress); err != nil {
mac, err := net.ParseMAC(config.MacAddress)
if err != nil {
return err
}
if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
return err
}
}
ip, ipNet, err := net.ParseCIDR(config.Address)
ip, err := netlink.ParseAddr(config.Address)
if err != nil {
return err
}
if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
if err := netlink.AddrAdd(child, ip); err != nil {
return err
}
if config.IPv6Address != "" {
if ip, ipNet, err = net.ParseCIDR(config.IPv6Address); err != nil {
ip6, err := netlink.ParseAddr(config.IPv6Address)
if err != nil {
return err
}
if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
if err := netlink.AddrAdd(child, ip6); err != nil {
return err
}
}
if err := netlink.NetworkSetMTU(child, config.Mtu); err != nil {
if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
return err
}
if err := netlink.NetworkLinkUp(child); err != nil {
if err := netlink.LinkSetUp(child); err != nil {
return err
}
if config.Gateway != "" {
if err := netlink.AddDefaultGw(config.Gateway, config.Name); err != nil {
gw := net.ParseIP(config.Gateway)
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
LinkIndex: child.Attrs().Index,
Gw: gw,
}); err != nil {
return err
}
}
if config.IPv6Gateway != "" {
if err := netlink.AddDefaultGw(config.IPv6Gateway, config.Name); err != nil {
gw := net.ParseIP(config.IPv6Gateway)
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
LinkIndex: child.Attrs().Index,
Gw: gw,
}); err != nil {
return err
}
}

View file

@ -65,11 +65,11 @@ static int clone_parent(jmp_buf * env)
void nsexec()
{
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
const int num = sizeof(namespaces) / sizeof(char *);
jmp_buf env;
char buf[PATH_MAX], *val;
int i, tfd, child, len, pipenum, consolefd = -1;
int i, tfd, self_tfd, child, len, pipenum, consolefd = -1;
pid_t pid;
char *console;
@ -114,17 +114,30 @@ void nsexec()
exit(1);
}
self_tfd = open("/proc/self/ns", O_DIRECTORY | O_RDONLY);
if (self_tfd == -1) {
pr_perror("Failed to open /proc/self/ns");
exit(1);
}
for (i = 0; i < num; i++) {
struct stat st;
struct stat self_st;
int fd;
/* Symlinks on all namespaces exist for dead processes, but they can't be opened */
if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) {
if (fstatat(tfd, namespaces[i], &st, 0) == -1) {
// Ignore nonexistent namespaces.
if (errno == ENOENT)
continue;
}
/* Skip namespaces we're already part of */
if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 &&
st.st_ino == self_st.st_ino) {
continue;
}
fd = openat(tfd, namespaces[i], O_RDONLY);
if (fd == -1) {
pr_perror("Failed to open ns file %s for ns %s", buf,
@ -139,6 +152,9 @@ void nsexec()
close(fd);
}
close(self_tfd);
close(tfd);
if (setjmp(env) == 1) {
// Child

View file

@ -203,9 +203,10 @@ func (p *initProcess) start() (err error) {
}()
if p.config.Config.Hooks != nil {
s := configs.HookState{
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
}
for _, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {

View file

@ -68,7 +68,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
return newSystemError(err)
}
if !setupDev {
if err := reOpenDevNull(config.Rootfs); err != nil {
if err := reOpenDevNull(); err != nil {
return newSystemError(err)
}
}
@ -96,7 +96,6 @@ func mountCmd(cmd configs.Command) error {
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
var (
dest = m.Destination
data = label.FormatMountLabel(m.Data, mountLabel)
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
@ -107,12 +106,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), "")
return mountPropagate(m, rootfs, mountLabel)
case "mqueue":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), ""); err != nil {
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
return label.SetFileLabel(dest, mountLabel)
@ -123,7 +122,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
return err
}
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
if stat != nil {
@ -136,12 +135,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
return mountPropagate(m, rootfs, mountLabel)
case "securityfs":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
return mountPropagate(m, rootfs, mountLabel)
case "bind":
stat, err := os.Stat(m.Source)
if err != nil {
@ -159,16 +158,17 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
if m.Flags&syscall.MS_RDONLY != 0 {
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
return err
}
// bind mount won't change mount options, we need remount to make mount options effective.
if err := remount(m, rootfs); err != nil {
return err
}
if m.Relabel != "" {
if err := label.Validate(m.Relabel); err != nil {
@ -179,11 +179,6 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
return err
}
}
if m.Flags&syscall.MS_PRIVATE != 0 {
if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
return err
}
}
case "cgroup":
binds, err := getCgroupMounts(m)
if err != nil {
@ -197,11 +192,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
}
}
tmpfs := &configs.Mount{
Source: "tmpfs",
Device: "tmpfs",
Destination: m.Destination,
Flags: defaultMountFlags,
Data: "mode=755",
Source: "tmpfs",
Device: "tmpfs",
Destination: m.Destination,
Flags: defaultMountFlags,
Data: "mode=755",
PropagationFlags: m.PropagationFlags,
}
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
return err
@ -236,8 +232,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
}
if m.Flags&syscall.MS_RDONLY != 0 {
// remount cgroup root as readonly
rootfsCgroup := filepath.Join(rootfs, m.Destination)
if err := syscall.Mount("", rootfsCgroup, "", defaultMountFlags|syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
mcgrouproot := &configs.Mount{
Destination: m.Destination,
Flags: defaultMountFlags | syscall.MS_RDONLY,
}
if err := remount(mcgrouproot, rootfs); err != nil {
return err
}
}
@ -253,10 +252,15 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
return nil, err
}
cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
var binds []*configs.Mount
for _, mm := range mounts {
dir, err := mm.GetThisCgroupDir()
dir, err := mm.GetThisCgroupDir(cgroupPaths)
if err != nil {
return nil, err
}
@ -265,10 +269,11 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
return nil, err
}
binds = append(binds, &configs.Mount{
Device: "bind",
Source: filepath.Join(mm.Mountpoint, relDir),
Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags,
Device: "bind",
Source: filepath.Join(mm.Mountpoint, relDir),
Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags,
PropagationFlags: m.PropagationFlags,
})
}
@ -325,7 +330,7 @@ func setupDevSymlinks(rootfs string) error {
// this method will make them point to `/dev/null` in this container's rootfs. This
// needs to be called after we chroot/pivot into the container's rootfs so that any
// symlinks are resolved locally.
func reOpenDevNull(rootfs string) error {
func reOpenDevNull() error {
var stat, devNullStat syscall.Stat_t
file, err := os.Open("/dev/null")
if err != nil {
@ -430,7 +435,7 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != nil {
return console.mount(config.Rootfs, config.MountLabel, 0, 0)
return console.mount(config.Rootfs, config.MountLabel)
}
return nil
}
@ -529,3 +534,40 @@ func writeSystemProperty(key, value string) error {
keyPath := strings.Replace(key, ".", "/", -1)
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
}
func remount(m *configs.Mount, rootfs string) error {
var (
dest = m.Destination
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
return err
}
return nil
}
// Do the mount operation followed by additional mounts required to take care
// of propagation flags.
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
var (
dest = m.Destination
data = label.FormatMountLabel(m.Data, mountLabel)
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
return err
}
for _, pflag := range m.PropagationFlags {
if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
return err
}
}
return nil
}

View file

@ -37,6 +37,18 @@ func InitSeccomp(config *configs.Seccomp) error {
return fmt.Errorf("error creating filter: %s", err)
}
// Add extra architectures
for _, arch := range config.Architectures {
scmpArch, err := libseccomp.GetArchFromString(arch)
if err != nil {
return err
}
if err := filter.AddArch(scmpArch); err != nil {
return err
}
}
// Unset no new privs bit
if err := filter.SetNoNewPrivsBit(false); err != nil {
return fmt.Errorf("error setting no new privileges: %s", err)