Merge pull request #11208 from LK4D4/new_libcontainer_api

New libcontainer api
This commit is contained in:
Jessie Frazelle 2015-03-10 07:50:19 -07:00
commit 2fb89b2e2c
169 changed files with 9274 additions and 5645 deletions

View file

@ -14,6 +14,8 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/devices"
"github.com/docker/libcontainer/label" "github.com/docker/libcontainer/label"
@ -259,18 +261,18 @@ func populateCommand(c *Container, env []string) error {
pid.HostPid = c.hostConfig.PidMode.IsHost() pid.HostPid = c.hostConfig.PidMode.IsHost()
// Build lists of devices allowed and created within the container. // Build lists of devices allowed and created within the container.
userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices)) userSpecifiedDevices := make([]*configs.Device, len(c.hostConfig.Devices))
for i, deviceMapping := range c.hostConfig.Devices { for i, deviceMapping := range c.hostConfig.Devices {
device, err := devices.GetDevice(deviceMapping.PathOnHost, deviceMapping.CgroupPermissions) device, err := devices.DeviceFromPath(deviceMapping.PathOnHost, deviceMapping.CgroupPermissions)
if err != nil { if err != nil {
return fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err) return fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err)
} }
device.Path = deviceMapping.PathInContainer device.Path = deviceMapping.PathInContainer
userSpecifiedDevices[i] = device userSpecifiedDevices[i] = device
} }
allowedDevices := append(devices.DefaultAllowedDevices, userSpecifiedDevices...) allowedDevices := append(configs.DefaultAllowedDevices, userSpecifiedDevices...)
autoCreatedDevices := append(devices.DefaultAutoCreatedDevices, userSpecifiedDevices...) autoCreatedDevices := append(configs.DefaultAutoCreatedDevices, userSpecifiedDevices...)
// TODO: this can be removed after lxc-conf is fully deprecated // TODO: this can be removed after lxc-conf is fully deprecated
lxcConfig, err := mergeLxcConfIntoOptions(c.hostConfig) lxcConfig, err := mergeLxcConfIntoOptions(c.hostConfig)
@ -972,7 +974,7 @@ func (container *Container) Exposes(p nat.Port) bool {
return exists return exists
} }
func (container *Container) GetPtyMaster() (*os.File, error) { func (container *Container) GetPtyMaster() (libcontainer.Console, error) {
ttyConsole, ok := container.command.ProcessConfig.Terminal.(execdriver.TtyTerminal) ttyConsole, ok := container.command.ProcessConfig.Terminal.(execdriver.TtyTerminal)
if !ok { if !ok {
return nil, ErrNoTTY return nil, ErrNoTTY

View file

@ -1,17 +1,22 @@
package execdriver package execdriver
import ( import (
"encoding/json"
"errors" "errors"
"io" "io"
"io/ioutil"
"os" "os"
"os/exec" "os/exec"
"path/filepath"
"strconv"
"strings" "strings"
"time" "time"
"github.com/docker/docker/daemon/execdriver/native/template" "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/docker/pkg/ulimit" "github.com/docker/docker/pkg/ulimit"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/configs"
) )
// Context is a generic key value pair that allows // Context is a generic key value pair that allows
@ -42,7 +47,7 @@ type Terminal interface {
} }
type TtyTerminal interface { type TtyTerminal interface {
Master() *os.File Master() libcontainer.Console
} }
// ExitStatus provides exit reasons for a container. // ExitStatus provides exit reasons for a container.
@ -109,7 +114,7 @@ type Resources struct {
} }
type ResourceStats struct { type ResourceStats struct {
*libcontainer.ContainerStats *libcontainer.Stats
Read time.Time `json:"read"` Read time.Time `json:"read"`
MemoryLimit int64 `json:"memory_limit"` MemoryLimit int64 `json:"memory_limit"`
SystemUsage uint64 `json:"system_usage"` SystemUsage uint64 `json:"system_usage"`
@ -149,8 +154,8 @@ type Command struct {
Pid *Pid `json:"pid"` Pid *Pid `json:"pid"`
Resources *Resources `json:"resources"` Resources *Resources `json:"resources"`
Mounts []Mount `json:"mounts"` Mounts []Mount `json:"mounts"`
AllowedDevices []*devices.Device `json:"allowed_devices"` AllowedDevices []*configs.Device `json:"allowed_devices"`
AutoCreatedDevices []*devices.Device `json:"autocreated_devices"` AutoCreatedDevices []*configs.Device `json:"autocreated_devices"`
CapAdd []string `json:"cap_add"` CapAdd []string `json:"cap_add"`
CapDrop []string `json:"cap_drop"` CapDrop []string `json:"cap_drop"`
ContainerPid int `json:"container_pid"` // the pid for the process inside a container ContainerPid int `json:"container_pid"` // the pid for the process inside a container
@ -161,23 +166,19 @@ type Command struct {
AppArmorProfile string `json:"apparmor_profile"` AppArmorProfile string `json:"apparmor_profile"`
} }
func InitContainer(c *Command) *libcontainer.Config { func InitContainer(c *Command) *configs.Config {
container := template.New() container := template.New()
container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env) container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env)
container.Tty = c.ProcessConfig.Tty
container.User = c.ProcessConfig.User
container.WorkingDir = c.WorkingDir
container.Env = c.ProcessConfig.Env
container.Cgroups.Name = c.ID container.Cgroups.Name = c.ID
container.Cgroups.AllowedDevices = c.AllowedDevices container.Cgroups.AllowedDevices = c.AllowedDevices
container.MountConfig.DeviceNodes = c.AutoCreatedDevices container.Readonlyfs = c.ReadonlyRootfs
container.RootFs = c.Rootfs container.Devices = c.AutoCreatedDevices
container.MountConfig.ReadonlyFs = c.ReadonlyRootfs container.Rootfs = c.Rootfs
container.Readonlyfs = c.ReadonlyRootfs
// check to see if we are running in ramdisk to disable pivot root // check to see if we are running in ramdisk to disable pivot root
container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
container.RestrictSys = true
return container return container
} }
@ -191,7 +192,7 @@ func getEnv(key string, env []string) string {
return "" return ""
} }
func SetupCgroups(container *libcontainer.Config, c *Command) error { func SetupCgroups(container *configs.Config, c *Command) error {
if c.Resources != nil { if c.Resources != nil {
container.Cgroups.CpuShares = c.Resources.CpuShares container.Cgroups.CpuShares = c.Resources.CpuShares
container.Cgroups.Memory = c.Resources.Memory container.Cgroups.Memory = c.Resources.Memory
@ -203,28 +204,98 @@ func SetupCgroups(container *libcontainer.Config, c *Command) error {
return nil return nil
} }
func Stats(stateFile string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) { // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
state, err := libcontainer.GetState(stateFile) func getNetworkInterfaceStats(interfaceName string) (*libcontainer.NetworkInterface, error) {
if err != nil { out := &libcontainer.NetworkInterface{Name: interfaceName}
if os.IsNotExist(err) { // This can happen if the network runtime information is missing - possible if the
return nil, ErrNotRunning // container was created by an old version of libcontainer.
if interfaceName == "" {
return out, nil
}
type netStatsPair struct {
// Where to write the output.
Out *uint64
// The network stats file to read.
File string
}
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
netStats := []netStatsPair{
{Out: &out.RxBytes, File: "tx_bytes"},
{Out: &out.RxPackets, File: "tx_packets"},
{Out: &out.RxErrors, File: "tx_errors"},
{Out: &out.RxDropped, File: "tx_dropped"},
{Out: &out.TxBytes, File: "rx_bytes"},
{Out: &out.TxPackets, File: "rx_packets"},
{Out: &out.TxErrors, File: "rx_errors"},
{Out: &out.TxDropped, File: "rx_dropped"},
}
for _, netStat := range netStats {
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
if err != nil {
return nil, err
} }
*(netStat.Out) = data
}
return out, nil
}
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
if err != nil {
return 0, err
}
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
}
func Stats(containerDir string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) {
f, err := os.Open(filepath.Join(containerDir, "state.json"))
if err != nil {
return nil, err
}
defer f.Close()
type network struct {
Type string
HostInterfaceName string
}
state := struct {
CgroupPaths map[string]string `json:"cgroup_paths"`
Networks []network
}{}
if err := json.NewDecoder(f).Decode(&state); err != nil {
return nil, err return nil, err
} }
now := time.Now() now := time.Now()
stats, err := libcontainer.GetStats(nil, state)
mgr := fs.Manager{Paths: state.CgroupPaths}
cstats, err := mgr.GetStats()
if err != nil { if err != nil {
return nil, err return nil, err
} }
stats := &libcontainer.Stats{CgroupStats: cstats}
// if the container does not have any memory limit specified set the // if the container does not have any memory limit specified set the
// limit to the machines memory // limit to the machines memory
memoryLimit := containerMemoryLimit memoryLimit := containerMemoryLimit
if memoryLimit == 0 { if memoryLimit == 0 {
memoryLimit = machineMemory memoryLimit = machineMemory
} }
for _, iface := range state.Networks {
switch iface.Type {
case "veth":
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
if err != nil {
return nil, err
}
stats.Interfaces = append(stats.Interfaces, istats)
}
}
return &ResourceStats{ return &ResourceStats{
Read: now, Stats: stats,
ContainerStats: stats, Read: now,
MemoryLimit: memoryLimit, MemoryLimit: memoryLimit,
}, nil }, nil
} }

View file

@ -23,7 +23,9 @@ import (
"github.com/docker/docker/utils" "github.com/docker/docker/utils"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/mount/nodes" "github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user"
"github.com/kr/pty" "github.com/kr/pty"
) )
@ -42,7 +44,7 @@ type driver struct {
} }
type activeContainer struct { type activeContainer struct {
container *libcontainer.Config container *configs.Config
cmd *exec.Cmd cmd *exec.Cmd
} }
@ -190,7 +192,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
c.ProcessConfig.Path = aname c.ProcessConfig.Path = aname
c.ProcessConfig.Args = append([]string{name}, arg...) c.ProcessConfig.Args = append([]string{name}, arg...)
if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil { if err := createDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err return execdriver.ExitStatus{ExitCode: -1}, err
} }
@ -231,11 +233,17 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
} }
state := &libcontainer.State{ state := &libcontainer.State{
InitPid: pid, InitProcessPid: pid,
CgroupPaths: cgroupPaths, CgroupPaths: cgroupPaths,
} }
if err := libcontainer.SaveState(dataPath, state); err != nil { f, err := os.Create(filepath.Join(dataPath, "state.json"))
if err != nil {
return terminate(err)
}
defer f.Close()
if err := json.NewEncoder(f).Encode(state); err != nil {
return terminate(err) return terminate(err)
} }
@ -245,8 +253,9 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
log.Debugf("Invoking startCallback") log.Debugf("Invoking startCallback")
startCallback(&c.ProcessConfig, pid) startCallback(&c.ProcessConfig, pid)
} }
oomKill := false oomKill := false
oomKillNotification, err := libcontainer.NotifyOnOOM(state) oomKillNotification, err := notifyOnOOM(cgroupPaths)
if err == nil { if err == nil {
_, oomKill = <-oomKillNotification _, oomKill = <-oomKillNotification
log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr) log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr)
@ -265,9 +274,57 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr
} }
// copy from libcontainer
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
dir := paths["memory"]
if dir == "" {
return nil, fmt.Errorf("There is no path for %q in state", "memory")
}
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
oomControl.Close()
return nil, syserr
}
eventfd := os.NewFile(fd, "eventfd")
eventControlPath := filepath.Join(dir, "cgroup.event_control")
data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
eventfd.Close()
oomControl.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
oomControl.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}
// createContainer populates and configures the container type with the // createContainer populates and configures the container type with the
// data provided by the execdriver.Command // data provided by the execdriver.Command
func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, error) { func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) {
container := execdriver.InitContainer(c) container := execdriver.InitContainer(c)
if err := execdriver.SetupCgroups(container, c); err != nil { if err := execdriver.SetupCgroups(container, c); err != nil {
return nil, err return nil, err
@ -297,6 +354,87 @@ func cgroupPaths(containerId string) (map[string]string, error) {
return paths, nil return paths, nil
} }
// this is copy from old libcontainer nodes.go
func createDeviceNodes(rootfs string, nodesToCreate []*configs.Device) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
for _, node := range nodesToCreate {
if err := createDeviceNode(rootfs, node); err != nil {
return err
}
}
return nil
}
// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device) error {
var (
dest = filepath.Join(rootfs, node.Path)
parent = filepath.Dir(dest)
)
if err := os.MkdirAll(parent, 0755); err != nil {
return err
}
fileMode := node.FileMode
switch node.Type {
case 'c':
fileMode |= syscall.S_IFCHR
case 'b':
fileMode |= syscall.S_IFBLK
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) {
return fmt.Errorf("mknod %s %s", node.Path, err)
}
if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil {
return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid)
}
return nil
}
// setupUser changes the groups, gid, and uid for the user inside the container
// copy from libcontainer, cause not it's private
func setupUser(userSpec string) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(userSpec, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
}
}
return nil
}
/// Return the exit code of the process /// Return the exit code of the process
// if the process has not exited -1 will be returned // if the process has not exited -1 will be returned
func getExitCode(c *execdriver.Command) int { func getExitCode(c *execdriver.Command) int {

View file

@ -3,8 +3,6 @@ package lxc
import ( import (
"fmt" "fmt"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces"
"github.com/docker/libcontainer/utils" "github.com/docker/libcontainer/utils"
) )
@ -12,9 +10,7 @@ func finalizeNamespace(args *InitArgs) error {
if err := utils.CloseExecFrom(3); err != nil { if err := utils.CloseExecFrom(3); err != nil {
return err return err
} }
if err := namespaces.SetupUser(&libcontainer.Config{ if err := setupUser(args.User); err != nil {
User: args.User,
}); err != nil {
return fmt.Errorf("setup user %s", err) return fmt.Errorf("setup user %s", err)
} }
if err := setupWorkingDirectory(args); err != nil { if err := setupWorkingDirectory(args); err != nil {

View file

@ -11,7 +11,6 @@ import (
nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template" nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/docker/utils" "github.com/docker/docker/utils"
"github.com/docker/libcontainer/label" "github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/security/capabilities"
) )
const LxcTemplate = ` const LxcTemplate = `
@ -169,7 +168,7 @@ func keepCapabilities(adds []string, drops []string) ([]string, error) {
var newCaps []string var newCaps []string
for _, cap := range caps { for _, cap := range caps {
log.Debugf("cap %s\n", cap) log.Debugf("cap %s\n", cap)
realCap := capabilities.GetCapability(cap) realCap := execdriver.GetCapability(cap)
numCap := fmt.Sprintf("%d", realCap.Value) numCap := fmt.Sprintf("%d", realCap.Value)
newCaps = append(newCaps, numCap) newCaps = append(newCaps, numCap)
} }
@ -180,13 +179,10 @@ func keepCapabilities(adds []string, drops []string) ([]string, error) {
func dropList(drops []string) ([]string, error) { func dropList(drops []string) ([]string, error) {
if utils.StringsContainsNoCase(drops, "all") { if utils.StringsContainsNoCase(drops, "all") {
var newCaps []string var newCaps []string
for _, cap := range capabilities.GetAllCapabilities() { for _, capName := range execdriver.GetAllCapabilities() {
log.Debugf("drop cap %s\n", cap) cap := execdriver.GetCapability(capName)
realCap := capabilities.GetCapability(cap) log.Debugf("drop cap %s\n", cap.Key)
if realCap == nil { numCap := fmt.Sprintf("%d", cap.Value)
return nil, fmt.Errorf("Invalid capability '%s'", cap)
}
numCap := fmt.Sprintf("%d", realCap.Value)
newCaps = append(newCaps, numCap) newCaps = append(newCaps, numCap)
} }
return newCaps, nil return newCaps, nil

View file

@ -5,11 +5,6 @@ package lxc
import ( import (
"bufio" "bufio"
"fmt" "fmt"
"github.com/docker/docker/daemon/execdriver"
nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/libcontainer/devices"
"github.com/docker/libcontainer/security/capabilities"
"github.com/syndtr/gocapability/capability"
"io/ioutil" "io/ioutil"
"math/rand" "math/rand"
"os" "os"
@ -17,6 +12,11 @@ import (
"strings" "strings"
"testing" "testing"
"time" "time"
"github.com/docker/docker/daemon/execdriver"
nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/libcontainer/configs"
"github.com/syndtr/gocapability/capability"
) )
func TestLXCConfig(t *testing.T) { func TestLXCConfig(t *testing.T) {
@ -53,7 +53,7 @@ func TestLXCConfig(t *testing.T) {
Mtu: 1500, Mtu: 1500,
Interface: nil, Interface: nil,
}, },
AllowedDevices: make([]*devices.Device, 0), AllowedDevices: make([]*configs.Device, 0),
ProcessConfig: execdriver.ProcessConfig{}, ProcessConfig: execdriver.ProcessConfig{},
} }
p, err := driver.generateLXCConfig(command) p, err := driver.generateLXCConfig(command)
@ -295,7 +295,7 @@ func TestCustomLxcConfigMisc(t *testing.T) {
grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1") grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
container := nativeTemplate.New() container := nativeTemplate.New()
for _, cap := range container.Capabilities { for _, cap := range container.Capabilities {
realCap := capabilities.GetCapability(cap) realCap := execdriver.GetCapability(cap)
numCap := fmt.Sprintf("%d", realCap.Value) numCap := fmt.Sprintf("%d", realCap.Value)
if cap != "MKNOD" && cap != "KILL" { if cap != "MKNOD" && cap != "KILL" {
grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap)) grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap))
@ -359,7 +359,7 @@ func TestCustomLxcConfigMiscOverride(t *testing.T) {
grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1") grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
container := nativeTemplate.New() container := nativeTemplate.New()
for _, cap := range container.Capabilities { for _, cap := range container.Capabilities {
realCap := capabilities.GetCapability(cap) realCap := execdriver.GetCapability(cap)
numCap := fmt.Sprintf("%d", realCap.Value) numCap := fmt.Sprintf("%d", realCap.Value)
if cap != "MKNOD" && cap != "KILL" { if cap != "MKNOD" && cap != "KILL" {
grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap)) grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap))

View file

@ -3,21 +3,24 @@
package native package native
import ( import (
"errors"
"fmt" "fmt"
"os/exec" "net"
"path/filepath" "path/filepath"
"strings"
"syscall"
"github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/execdriver"
"github.com/docker/libcontainer" "github.com/docker/docker/pkg/symlink"
"github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/devices"
"github.com/docker/libcontainer/mount" "github.com/docker/libcontainer/utils"
"github.com/docker/libcontainer/security/capabilities"
) )
// createContainer populates and configures the container type with the // createContainer populates and configures the container type with the
// data provided by the execdriver.Command // data provided by the execdriver.Command
func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, error) { func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) {
container := execdriver.InitContainer(c) container := execdriver.InitContainer(c)
if err := d.createIpc(container, c); err != nil { if err := d.createIpc(container, c); err != nil {
@ -33,6 +36,13 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
} }
if c.ProcessConfig.Privileged { if c.ProcessConfig.Privileged {
// clear readonly for /sys
for i := range container.Mounts {
if container.Mounts[i].Destination == "/sys" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
container.ReadonlyPaths = nil
if err := d.setPrivileged(container); err != nil { if err := d.setPrivileged(container); err != nil {
return nil, err return nil, err
} }
@ -57,43 +67,52 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
if err := d.setupLabels(container, c); err != nil { if err := d.setupLabels(container, c); err != nil {
return nil, err return nil, err
} }
d.setupRlimits(container, c) d.setupRlimits(container, c)
cmds := make(map[string]*exec.Cmd)
d.Lock()
for k, v := range d.activeContainers {
cmds[k] = v.cmd
}
d.Unlock()
return container, nil return container, nil
} }
func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Command) error { func generateIfaceName() (string, error) {
for i := 0; i < 10; i++ {
name, err := utils.GenerateRandomName("veth", 7)
if err != nil {
continue
}
if _, err := net.InterfaceByName(name); err != nil {
if strings.Contains(err.Error(), "no such") {
return name, nil
}
return "", err
}
}
return "", errors.New("Failed to find name for new interface")
}
func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error {
if c.Network.HostNetworking { if c.Network.HostNetworking {
container.Namespaces.Remove(libcontainer.NEWNET) container.Namespaces.Remove(configs.NEWNET)
return nil return nil
} }
container.Networks = []*libcontainer.Network{ container.Networks = []*configs.Network{
{ {
Mtu: c.Network.Mtu, Type: "loopback",
Address: fmt.Sprintf("%s/%d", "127.0.0.1", 0),
Gateway: "localhost",
Type: "loopback",
}, },
} }
iName, err := generateIfaceName()
if err != nil {
return err
}
if c.Network.Interface != nil { if c.Network.Interface != nil {
vethNetwork := libcontainer.Network{ vethNetwork := configs.Network{
Mtu: c.Network.Mtu, Name: "eth0",
Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), HostInterfaceName: iName,
MacAddress: c.Network.Interface.MacAddress, Mtu: c.Network.Mtu,
Gateway: c.Network.Interface.Gateway, Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen),
Type: "veth", MacAddress: c.Network.Interface.MacAddress,
Bridge: c.Network.Interface.Bridge, Gateway: c.Network.Interface.Gateway,
VethPrefix: "veth", Type: "veth",
Bridge: c.Network.Interface.Bridge,
} }
if c.Network.Interface.GlobalIPv6Address != "" { if c.Network.Interface.GlobalIPv6Address != "" {
vethNetwork.IPv6Address = fmt.Sprintf("%s/%d", c.Network.Interface.GlobalIPv6Address, c.Network.Interface.GlobalIPv6PrefixLen) vethNetwork.IPv6Address = fmt.Sprintf("%s/%d", c.Network.Interface.GlobalIPv6Address, c.Network.Interface.GlobalIPv6PrefixLen)
@ -107,21 +126,24 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
active := d.activeContainers[c.Network.ContainerID] active := d.activeContainers[c.Network.ContainerID]
d.Unlock() d.Unlock()
if active == nil || active.cmd.Process == nil { if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID) return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
} }
cmd := active.cmd
nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net") state, err := active.State()
container.Namespaces.Add(libcontainer.NEWNET, nspath) if err != nil {
return err
}
container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
} }
return nil return nil
} }
func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error { func (d *driver) createIpc(container *configs.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc { if c.Ipc.HostIpc {
container.Namespaces.Remove(libcontainer.NEWIPC) container.Namespaces.Remove(configs.NEWIPC)
return nil return nil
} }
@ -130,37 +152,38 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command
active := d.activeContainers[c.Ipc.ContainerID] active := d.activeContainers[c.Ipc.ContainerID]
d.Unlock() d.Unlock()
if active == nil || active.cmd.Process == nil { if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID) return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
} }
cmd := active.cmd
container.Namespaces.Add(libcontainer.NEWIPC, filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")) state, err := active.State()
if err != nil {
return err
}
container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
} }
return nil return nil
} }
func (d *driver) createPid(container *libcontainer.Config, c *execdriver.Command) error { func (d *driver) createPid(container *configs.Config, c *execdriver.Command) error {
if c.Pid.HostPid { if c.Pid.HostPid {
container.Namespaces.Remove(libcontainer.NEWPID) container.Namespaces.Remove(configs.NEWPID)
return nil return nil
} }
return nil return nil
} }
func (d *driver) setPrivileged(container *libcontainer.Config) (err error) { func (d *driver) setPrivileged(container *configs.Config) (err error) {
container.Capabilities = capabilities.GetAllCapabilities() container.Capabilities = execdriver.GetAllCapabilities()
container.Cgroups.AllowAllDevices = true container.Cgroups.AllowAllDevices = true
hostDeviceNodes, err := devices.GetHostDeviceNodes() hostDevices, err := devices.HostDevices()
if err != nil { if err != nil {
return err return err
} }
container.MountConfig.DeviceNodes = hostDeviceNodes container.Devices = hostDevices
container.RestrictSys = false
if apparmor.IsEnabled() { if apparmor.IsEnabled() {
container.AppArmorProfile = "unconfined" container.AppArmorProfile = "unconfined"
@ -169,39 +192,52 @@ func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
return nil return nil
} }
func (d *driver) setCapabilities(container *libcontainer.Config, c *execdriver.Command) (err error) { func (d *driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop) container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
return err return err
} }
func (d *driver) setupRlimits(container *libcontainer.Config, c *execdriver.Command) { func (d *driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
if c.Resources == nil { if c.Resources == nil {
return return
} }
for _, rlimit := range c.Resources.Rlimits { for _, rlimit := range c.Resources.Rlimits {
container.Rlimits = append(container.Rlimits, libcontainer.Rlimit((*rlimit))) container.Rlimits = append(container.Rlimits, configs.Rlimit{
} Type: rlimit.Type,
} Hard: rlimit.Hard,
Soft: rlimit.Soft,
func (d *driver) setupMounts(container *libcontainer.Config, c *execdriver.Command) error {
for _, m := range c.Mounts {
container.MountConfig.Mounts = append(container.MountConfig.Mounts, &mount.Mount{
Type: "bind",
Source: m.Source,
Destination: m.Destination,
Writable: m.Writable,
Private: m.Private,
Slave: m.Slave,
}) })
} }
}
func (d *driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
for _, m := range c.Mounts {
dest, err := symlink.FollowSymlinkInScope(filepath.Join(c.Rootfs, m.Destination), c.Rootfs)
if err != nil {
return err
}
flags := syscall.MS_BIND | syscall.MS_REC
if !m.Writable {
flags |= syscall.MS_RDONLY
}
if m.Slave {
flags |= syscall.MS_SLAVE
}
container.Mounts = append(container.Mounts, &configs.Mount{
Source: m.Source,
Destination: dest,
Device: "bind",
Flags: flags,
})
}
return nil return nil
} }
func (d *driver) setupLabels(container *libcontainer.Config, c *execdriver.Command) error { func (d *driver) setupLabels(container *configs.Config, c *execdriver.Command) error {
container.ProcessLabel = c.ProcessLabel container.ProcessLabel = c.ProcessLabel
container.MountConfig.MountLabel = c.MountLabel container.MountLabel = c.MountLabel
return nil return nil
} }

View file

@ -4,28 +4,28 @@ package native
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strings"
"sync" "sync"
"syscall" "syscall"
"time"
log "github.com/Sirupsen/logrus" log "github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/reexec"
sysinfo "github.com/docker/docker/pkg/system" sysinfo "github.com/docker/docker/pkg/system"
"github.com/docker/docker/pkg/term" "github.com/docker/docker/pkg/term"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/cgroups/systemd"
consolepkg "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/namespaces"
_ "github.com/docker/libcontainer/namespaces/nsenter"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/utils"
) )
const ( const (
@ -33,16 +33,12 @@ const (
Version = "0.2" Version = "0.2"
) )
type activeContainer struct {
container *libcontainer.Config
cmd *exec.Cmd
}
type driver struct { type driver struct {
root string root string
initPath string initPath string
activeContainers map[string]*activeContainer activeContainers map[string]libcontainer.Container
machineMemory int64 machineMemory int64
factory libcontainer.Factory
sync.Mutex sync.Mutex
} }
@ -59,11 +55,22 @@ func NewDriver(root, initPath string) (*driver, error) {
if err := apparmor.InstallDefaultProfile(); err != nil { if err := apparmor.InstallDefaultProfile(); err != nil {
return nil, err return nil, err
} }
cgm := libcontainer.Cgroupfs
if systemd.UseSystemd() {
cgm = libcontainer.SystemdCgroups
}
f, err := libcontainer.New(root, cgm, libcontainer.InitPath(reexec.Self(), DriverName))
if err != nil {
return nil, err
}
return &driver{ return &driver{
root: root, root: root,
initPath: initPath, initPath: initPath,
activeContainers: make(map[string]*activeContainer), activeContainers: make(map[string]libcontainer.Container),
machineMemory: meminfo.MemTotal, machineMemory: meminfo.MemTotal,
factory: f,
}, nil }, nil
} }
@ -81,101 +88,141 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
var term execdriver.Terminal var term execdriver.Terminal
p := &libcontainer.Process{
Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
Env: c.ProcessConfig.Env,
Cwd: c.WorkingDir,
User: c.ProcessConfig.User,
}
if c.ProcessConfig.Tty { if c.ProcessConfig.Tty {
term, err = NewTtyConsole(&c.ProcessConfig, pipes) rootuid, err := container.HostUID()
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
cons, err := p.NewConsole(rootuid)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
term, err = NewTtyConsole(cons, pipes, rootuid)
} else { } else {
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) p.Stdout = pipes.Stdout
p.Stderr = pipes.Stderr
r, w, err := os.Pipe()
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
if pipes.Stdin != nil {
go func() {
io.Copy(w, pipes.Stdin)
w.Close()
}()
p.Stdin = r
}
term = &execdriver.StdConsole{}
} }
if err != nil { if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err return execdriver.ExitStatus{ExitCode: -1}, err
} }
c.ProcessConfig.Terminal = term c.ProcessConfig.Terminal = term
cont, err := d.factory.Create(c.ID, container)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
d.Lock() d.Lock()
d.activeContainers[c.ID] = &activeContainer{ d.activeContainers[c.ID] = cont
container: container,
cmd: &c.ProcessConfig.Cmd,
}
d.Unlock() d.Unlock()
defer func() {
var ( cont.Destroy()
dataPath = filepath.Join(d.root, c.ID) d.cleanContainer(c.ID)
args = append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...)
)
if err := d.createContainerRoot(c.ID); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
defer d.cleanContainer(c.ID)
if err := d.writeContainerFile(container, c.ID); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
execOutputChan := make(chan execOutput, 1)
waitForStart := make(chan struct{})
go func() {
exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
c.ProcessConfig.Path = d.initPath
c.ProcessConfig.Args = append([]string{
DriverName,
"-console", console,
"-pipe", "3",
"-root", filepath.Join(d.root, c.ID),
"--",
}, args...)
// set this to nil so that when we set the clone flags anything else is reset
c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
}
c.ProcessConfig.ExtraFiles = []*os.File{child}
c.ProcessConfig.Env = container.Env
c.ProcessConfig.Dir = container.RootFs
return &c.ProcessConfig.Cmd
}, func() {
close(waitForStart)
if startCallback != nil {
c.ContainerPid = c.ProcessConfig.Process.Pid
startCallback(&c.ProcessConfig, c.ContainerPid)
}
})
execOutputChan <- execOutput{exitCode, err}
}() }()
select { if err := cont.Start(p); err != nil {
case execOutput := <-execOutputChan: return execdriver.ExitStatus{ExitCode: -1}, err
return execdriver.ExitStatus{ExitCode: execOutput.exitCode}, execOutput.err
case <-waitForStart:
break
} }
oomKill := false if startCallback != nil {
state, err := libcontainer.GetState(filepath.Join(d.root, c.ID)) pid, err := p.Pid()
if err == nil { if err != nil {
oomKillNotification, err := libcontainer.NotifyOnOOM(state) p.Signal(os.Kill)
if err == nil { p.Wait()
_, oomKill = <-oomKillNotification return execdriver.ExitStatus{ExitCode: -1}, err
} else {
log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
} }
} else { startCallback(&c.ProcessConfig, pid)
log.Warnf("Failed to get container state, oom notify will not work: %s", err)
} }
// wait for the container to exit.
execOutput := <-execOutputChan
return execdriver.ExitStatus{ExitCode: execOutput.exitCode, OOMKilled: oomKill}, execOutput.err oomKillNotification, err := cont.NotifyOOM()
if err != nil {
oomKillNotification = nil
log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
}
waitF := p.Wait
if nss := cont.Config().Namespaces; nss.Contains(configs.NEWPID) {
// we need such hack for tracking processes with inerited fds,
// because cmd.Wait() waiting for all streams to be copied
waitF = waitInPIDHost(p, cont)
}
ps, err := waitF()
if err != nil {
if err, ok := err.(*exec.ExitError); !ok {
return execdriver.ExitStatus{ExitCode: -1}, err
} else {
ps = err.ProcessState
}
}
cont.Destroy()
_, oomKill := <-oomKillNotification
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
} }
func (d *driver) Kill(p *execdriver.Command, sig int) error { func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
if p.ProcessConfig.Process == nil { return func() (*os.ProcessState, error) {
return errors.New("exec: not started") pid, err := p.Pid()
if err != nil {
return nil, err
}
process, err := os.FindProcess(pid)
s, err := process.Wait()
if err != nil {
if err, ok := err.(*exec.ExitError); !ok {
return s, err
} else {
s = err.ProcessState
}
}
processes, err := c.Processes()
if err != nil {
return s, err
}
for _, pid := range processes {
process, err := os.FindProcess(pid)
if err != nil {
log.Errorf("Failed to kill process: %d", pid)
continue
}
process.Kill()
}
p.Wait()
return s, err
} }
return syscall.Kill(p.ProcessConfig.Process.Pid, syscall.Signal(sig)) }
func (d *driver) Kill(c *execdriver.Command, sig int) error {
active := d.activeContainers[c.ID]
if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID)
}
state, err := active.State()
if err != nil {
return err
}
return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
} }
func (d *driver) Pause(c *execdriver.Command) error { func (d *driver) Pause(c *execdriver.Command) error {
@ -183,11 +230,7 @@ func (d *driver) Pause(c *execdriver.Command) error {
if active == nil { if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID) return fmt.Errorf("active container for %s does not exist", c.ID)
} }
active.container.Cgroups.Freezer = "FROZEN" return active.Pause()
if systemd.UseSystemd() {
return systemd.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
}
return fs.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
} }
func (d *driver) Unpause(c *execdriver.Command) error { func (d *driver) Unpause(c *execdriver.Command) error {
@ -195,44 +238,31 @@ func (d *driver) Unpause(c *execdriver.Command) error {
if active == nil { if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID) return fmt.Errorf("active container for %s does not exist", c.ID)
} }
active.container.Cgroups.Freezer = "THAWED" return active.Resume()
if systemd.UseSystemd() {
return systemd.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
}
return fs.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
} }
func (d *driver) Terminate(p *execdriver.Command) error { func (d *driver) Terminate(c *execdriver.Command) error {
// lets check the start time for the process // lets check the start time for the process
state, err := libcontainer.GetState(filepath.Join(d.root, p.ID)) active := d.activeContainers[c.ID]
if err != nil { if active == nil {
if !os.IsNotExist(err) { return fmt.Errorf("active container for %s does not exist", c.ID)
return err
}
// TODO: Remove this part for version 1.2.0
// This is added only to ensure smooth upgrades from pre 1.1.0 to 1.1.0
data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start"))
if err != nil {
// if we don't have the data on disk then we can assume the process is gone
// because this is only removed after we know the process has stopped
if os.IsNotExist(err) {
return nil
}
return err
}
state = &libcontainer.State{InitStartTime: string(data)}
} }
state, err := active.State()
if err != nil {
return err
}
pid := state.InitProcessPid
currentStartTime, err := system.GetProcessStartTime(p.ProcessConfig.Process.Pid) currentStartTime, err := system.GetProcessStartTime(pid)
if err != nil { if err != nil {
return err return err
} }
if state.InitStartTime == currentStartTime { if state.InitProcessStartTime == currentStartTime {
err = syscall.Kill(p.ProcessConfig.Process.Pid, 9) err = syscall.Kill(pid, 9)
syscall.Wait4(p.ProcessConfig.Process.Pid, nil, 0, nil) syscall.Wait4(pid, nil, 0, nil)
} }
d.cleanContainer(p.ID) d.cleanContainer(c.ID)
return err return err
@ -257,15 +287,10 @@ func (d *driver) GetPidsForContainer(id string) ([]int, error) {
if active == nil { if active == nil {
return nil, fmt.Errorf("active container for %s does not exist", id) return nil, fmt.Errorf("active container for %s does not exist", id)
} }
c := active.container.Cgroups return active.Processes()
if systemd.UseSystemd() {
return systemd.GetPids(c)
}
return fs.GetPids(c)
} }
func (d *driver) writeContainerFile(container *libcontainer.Config, id string) error { func (d *driver) writeContainerFile(container *configs.Config, id string) error {
data, err := json.Marshal(container) data, err := json.Marshal(container)
if err != nil { if err != nil {
return err return err
@ -289,42 +314,61 @@ func (d *driver) Clean(id string) error {
} }
func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) { func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
return execdriver.Stats(filepath.Join(d.root, id), d.activeContainers[id].container.Cgroups.Memory, d.machineMemory) c := d.activeContainers[id]
} now := time.Now()
stats, err := c.Stats()
type TtyConsole struct {
MasterPty *os.File
}
func NewTtyConsole(processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes) (*TtyConsole, error) {
ptyMaster, console, err := consolepkg.CreateMasterAndConsole()
if err != nil { if err != nil {
return nil, err return nil, err
} }
memoryLimit := c.Config().Cgroups.Memory
// if the container does not have any memory limit specified set the
// limit to the machines memory
if memoryLimit == 0 {
memoryLimit = d.machineMemory
}
return &execdriver.ResourceStats{
Stats: stats,
Read: now,
MemoryLimit: memoryLimit,
}, nil
}
func getEnv(key string, env []string) string {
for _, pair := range env {
parts := strings.Split(pair, "=")
if parts[0] == key {
return parts[1]
}
}
return ""
}
type TtyConsole struct {
console libcontainer.Console
}
func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes, rootuid int) (*TtyConsole, error) {
tty := &TtyConsole{ tty := &TtyConsole{
MasterPty: ptyMaster, console: console,
} }
if err := tty.AttachPipes(&processConfig.Cmd, pipes); err != nil { if err := tty.AttachPipes(pipes); err != nil {
tty.Close() tty.Close()
return nil, err return nil, err
} }
processConfig.Console = console
return tty, nil return tty, nil
} }
func (t *TtyConsole) Master() *os.File { func (t *TtyConsole) Master() libcontainer.Console {
return t.MasterPty return t.console
} }
func (t *TtyConsole) Resize(h, w int) error { func (t *TtyConsole) Resize(h, w int) error {
return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
} }
func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error { func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
go func() { go func() {
if wb, ok := pipes.Stdout.(interface { if wb, ok := pipes.Stdout.(interface {
CloseWriters() error CloseWriters() error
@ -332,12 +376,12 @@ func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) err
defer wb.CloseWriters() defer wb.CloseWriters()
} }
io.Copy(pipes.Stdout, t.MasterPty) io.Copy(pipes.Stdout, t.console)
}() }()
if pipes.Stdin != nil { if pipes.Stdin != nil {
go func() { go func() {
io.Copy(t.MasterPty, pipes.Stdin) io.Copy(t.console, pipes.Stdin)
pipes.Stdin.Close() pipes.Stdin.Close()
}() }()
@ -347,5 +391,5 @@ func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) err
} }
func (t *TtyConsole) Close() error { func (t *TtyConsole) Close() error {
return t.MasterPty.Close() return t.console.Close()
} }

View file

@ -4,67 +4,77 @@ package native
import ( import (
"fmt" "fmt"
"log"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "syscall"
"runtime"
"github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/reexec"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces" _ "github.com/docker/libcontainer/nsenter"
"github.com/docker/libcontainer/utils"
) )
const execCommandName = "nsenter-exec"
func init() {
reexec.Register(execCommandName, nsenterExec)
}
func nsenterExec() {
runtime.LockOSThread()
// User args are passed after '--' in the command line.
userArgs := findUserArgs()
config, err := loadConfigFromFd()
if err != nil {
log.Fatalf("docker-exec: unable to receive config from sync pipe: %s", err)
}
if err := namespaces.FinalizeSetns(config, userArgs); err != nil {
log.Fatalf("docker-exec: failed to exec: %s", err)
}
}
// TODO(vishh): Add support for running in priviledged mode and running as a different user. // TODO(vishh): Add support for running in priviledged mode and running as a different user.
func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
active := d.activeContainers[c.ID] active := d.activeContainers[c.ID]
if active == nil { if active == nil {
return -1, fmt.Errorf("No active container exists with ID %s", c.ID) return -1, fmt.Errorf("No active container exists with ID %s", c.ID)
} }
state, err := libcontainer.GetState(filepath.Join(d.root, c.ID))
if err != nil {
return -1, fmt.Errorf("State unavailable for container with ID %s. The container may have been cleaned up already. Error: %s", c.ID, err)
}
var term execdriver.Terminal var term execdriver.Terminal
var err error
p := &libcontainer.Process{
Args: append([]string{processConfig.Entrypoint}, processConfig.Arguments...),
Env: c.ProcessConfig.Env,
Cwd: c.WorkingDir,
User: c.ProcessConfig.User,
}
if processConfig.Tty { if processConfig.Tty {
term, err = NewTtyConsole(processConfig, pipes) config := active.Config()
rootuid, err := config.HostUID()
if err != nil {
return -1, err
}
cons, err := p.NewConsole(rootuid)
if err != nil {
return -1, err
}
term, err = NewTtyConsole(cons, pipes, rootuid)
} else { } else {
term, err = execdriver.NewStdConsole(processConfig, pipes) p.Stdout = pipes.Stdout
p.Stderr = pipes.Stderr
p.Stdin = pipes.Stdin
term = &execdriver.StdConsole{}
}
if err != nil {
return -1, err
} }
processConfig.Terminal = term processConfig.Terminal = term
args := append([]string{processConfig.Entrypoint}, processConfig.Arguments...) if err := active.Start(p); err != nil {
return -1, err
}
return namespaces.ExecIn(active.container, state, args, os.Args[0], "exec", processConfig.Stdin, processConfig.Stdout, processConfig.Stderr, processConfig.Console, if startCallback != nil {
func(cmd *exec.Cmd) { pid, err := p.Pid()
if startCallback != nil { if err != nil {
startCallback(&c.ProcessConfig, cmd.Process.Pid) p.Signal(os.Kill)
} p.Wait()
}) return -1, err
}
startCallback(&c.ProcessConfig, pid)
}
ps, err := p.Wait()
if err != nil {
exitErr, ok := err.(*exec.ExitError)
if !ok {
return -1, err
}
ps = exitErr.ProcessState
}
return utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), nil
} }

View file

@ -2,13 +2,6 @@
package native package native
import (
"os"
"path/filepath"
"github.com/docker/libcontainer"
)
type info struct { type info struct {
ID string ID string
driver *driver driver *driver
@ -18,13 +11,6 @@ type info struct {
// pid file for a container. If the file exists then the // pid file for a container. If the file exists then the
// container is currently running // container is currently running
func (i *info) IsRunning() bool { func (i *info) IsRunning() bool {
if _, err := libcontainer.GetState(filepath.Join(i.driver.root, i.ID)); err == nil { _, ok := i.driver.activeContainers[i.ID]
return true return ok
}
// TODO: Remove this part for version 1.2.0
// This is added only to ensure smooth upgrades from pre 1.1.0 to 1.1.0
if _, err := os.Stat(filepath.Join(i.driver.root, i.ID, "pid")); err == nil {
return true
}
return false
} }

View file

@ -3,55 +3,40 @@
package native package native
import ( import (
"encoding/json"
"flag"
"fmt" "fmt"
"os" "os"
"path/filepath"
"runtime" "runtime"
"github.com/docker/docker/pkg/reexec" "github.com/docker/docker/pkg/reexec"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces"
) )
func init() { func init() {
reexec.Register(DriverName, initializer) reexec.Register(DriverName, initializer)
} }
func fatal(err error) {
if lerr, ok := err.(libcontainer.Error); ok {
lerr.Detail(os.Stderr)
os.Exit(1)
}
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
func initializer() { func initializer() {
runtime.GOMAXPROCS(1)
runtime.LockOSThread() runtime.LockOSThread()
factory, err := libcontainer.New("")
var (
pipe = flag.Int("pipe", 0, "sync pipe fd")
console = flag.String("console", "", "console (pty slave) path")
root = flag.String("root", ".", "root path for configuration files")
)
flag.Parse()
var container *libcontainer.Config
f, err := os.Open(filepath.Join(*root, "container.json"))
if err != nil { if err != nil {
writeError(err) fatal(err)
}
if err := factory.StartInitialization(3); err != nil {
fatal(err)
} }
if err := json.NewDecoder(f).Decode(&container); err != nil { panic("unreachable")
f.Close()
writeError(err)
}
f.Close()
rootfs, err := os.Getwd()
if err != nil {
writeError(err)
}
if err := namespaces.Init(container, rootfs, *console, os.NewFile(uintptr(*pipe), "child"), flag.Args()); err != nil {
writeError(err)
}
panic("Unreachable")
} }
func writeError(err error) { func writeError(err error) {

View file

@ -1,14 +1,17 @@
package template package template
import ( import (
"github.com/docker/libcontainer" "syscall"
"github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs"
) )
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// New returns the docker default configuration for libcontainer // New returns the docker default configuration for libcontainer
func New() *libcontainer.Config { func New() *configs.Config {
container := &libcontainer.Config{ container := &configs.Config{
Capabilities: []string{ Capabilities: []string{
"CHOWN", "CHOWN",
"DAC_OVERRIDE", "DAC_OVERRIDE",
@ -25,18 +28,51 @@ func New() *libcontainer.Config {
"KILL", "KILL",
"AUDIT_WRITE", "AUDIT_WRITE",
}, },
Namespaces: libcontainer.Namespaces([]libcontainer.Namespace{ Namespaces: configs.Namespaces([]configs.Namespace{
{Type: "NEWNS"}, {Type: "NEWNS"},
{Type: "NEWUTS"}, {Type: "NEWUTS"},
{Type: "NEWIPC"}, {Type: "NEWIPC"},
{Type: "NEWPID"}, {Type: "NEWPID"},
{Type: "NEWNET"}, {Type: "NEWNET"},
}), }),
Cgroups: &cgroups.Cgroup{ Cgroups: &configs.Cgroup{
Parent: "docker", Parent: "docker",
AllowAllDevices: false, AllowAllDevices: false,
}, },
MountConfig: &libcontainer.MountConfig{}, Mounts: []*configs.Mount{
{
Device: "tmpfs",
Source: "shm",
Destination: "/dev/shm",
Data: "mode=1777,size=65536k",
Flags: defaultMountFlags,
},
{
Source: "mqueue",
Destination: "/dev/mqueue",
Device: "mqueue",
Flags: defaultMountFlags,
},
{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
},
MaskPaths: []string{
"/proc/kcore",
},
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
} }
if apparmor.IsEnabled() { if apparmor.IsEnabled() {

View file

@ -2,28 +2,21 @@
package native package native
import ( //func findUserArgs() []string {
"encoding/json" //for i, a := range os.Args {
"os" //if a == "--" {
//return os.Args[i+1:]
//}
//}
//return []string{}
//}
"github.com/docker/libcontainer" //// loadConfigFromFd loads a container's config from the sync pipe that is provided by
) //// fd 3 when running a process
//func loadConfigFromFd() (*configs.Config, error) {
func findUserArgs() []string { //var config *libcontainer.Config
for i, a := range os.Args { //if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
if a == "--" { //return nil, err
return os.Args[i+1:] //}
} //return config, nil
} //}
return []string{}
}
// loadConfigFromFd loads a container's config from the sync pipe that is provided by
// fd 3 when running a process
func loadConfigFromFd() (*libcontainer.Config, error) {
var config *libcontainer.Config
if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
return nil, err
}
return config, nil
}

View file

@ -5,13 +5,83 @@ import (
"strings" "strings"
"github.com/docker/docker/utils" "github.com/docker/docker/utils"
"github.com/docker/libcontainer/security/capabilities" "github.com/syndtr/gocapability/capability"
) )
var capabilityList = Capabilities{
{Key: "SETPCAP", Value: capability.CAP_SETPCAP},
{Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE},
{Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO},
{Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT},
{Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN},
{Key: "SYS_NICE", Value: capability.CAP_SYS_NICE},
{Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE},
{Key: "SYS_TIME", Value: capability.CAP_SYS_TIME},
{Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG},
{Key: "MKNOD", Value: capability.CAP_MKNOD},
{Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE},
{Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL},
{Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE},
{Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN},
{Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN},
{Key: "SYSLOG", Value: capability.CAP_SYSLOG},
{Key: "CHOWN", Value: capability.CAP_CHOWN},
{Key: "NET_RAW", Value: capability.CAP_NET_RAW},
{Key: "DAC_OVERRIDE", Value: capability.CAP_DAC_OVERRIDE},
{Key: "FOWNER", Value: capability.CAP_FOWNER},
{Key: "DAC_READ_SEARCH", Value: capability.CAP_DAC_READ_SEARCH},
{Key: "FSETID", Value: capability.CAP_FSETID},
{Key: "KILL", Value: capability.CAP_KILL},
{Key: "SETGID", Value: capability.CAP_SETGID},
{Key: "SETUID", Value: capability.CAP_SETUID},
{Key: "LINUX_IMMUTABLE", Value: capability.CAP_LINUX_IMMUTABLE},
{Key: "NET_BIND_SERVICE", Value: capability.CAP_NET_BIND_SERVICE},
{Key: "NET_BROADCAST", Value: capability.CAP_NET_BROADCAST},
{Key: "IPC_LOCK", Value: capability.CAP_IPC_LOCK},
{Key: "IPC_OWNER", Value: capability.CAP_IPC_OWNER},
{Key: "SYS_CHROOT", Value: capability.CAP_SYS_CHROOT},
{Key: "SYS_PTRACE", Value: capability.CAP_SYS_PTRACE},
{Key: "SYS_BOOT", Value: capability.CAP_SYS_BOOT},
{Key: "LEASE", Value: capability.CAP_LEASE},
{Key: "SETFCAP", Value: capability.CAP_SETFCAP},
{Key: "WAKE_ALARM", Value: capability.CAP_WAKE_ALARM},
{Key: "BLOCK_SUSPEND", Value: capability.CAP_BLOCK_SUSPEND},
}
type (
CapabilityMapping struct {
Key string `json:"key,omitempty"`
Value capability.Cap `json:"value,omitempty"`
}
Capabilities []*CapabilityMapping
)
func (c *CapabilityMapping) String() string {
return c.Key
}
func GetCapability(key string) *CapabilityMapping {
for _, capp := range capabilityList {
if capp.Key == key {
cpy := *capp
return &cpy
}
}
return nil
}
func GetAllCapabilities() []string {
output := make([]string, len(capabilityList))
for i, capability := range capabilityList {
output[i] = capability.String()
}
return output
}
func TweakCapabilities(basics, adds, drops []string) ([]string, error) { func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
var ( var (
newCaps []string newCaps []string
allCaps = capabilities.GetAllCapabilities() allCaps = GetAllCapabilities()
) )
// look for invalid cap in the drop list // look for invalid cap in the drop list
@ -26,7 +96,7 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
// handle --cap-add=all // handle --cap-add=all
if utils.StringsContainsNoCase(adds, "all") { if utils.StringsContainsNoCase(adds, "all") {
basics = capabilities.GetAllCapabilities() basics = allCaps
} }
if !utils.StringsContainsNoCase(drops, "all") { if !utils.StringsContainsNoCase(drops, "all") {

View file

@ -18,7 +18,7 @@ func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
enc := json.NewEncoder(job.Stdout) enc := json.NewEncoder(job.Stdout)
for v := range updates { for v := range updates {
update := v.(*execdriver.ResourceStats) update := v.(*execdriver.ResourceStats)
ss := convertToAPITypes(update.ContainerStats) ss := convertToAPITypes(update.Stats)
ss.MemoryStats.Limit = uint64(update.MemoryLimit) ss.MemoryStats.Limit = uint64(update.MemoryLimit)
ss.Read = update.Read ss.Read = update.Read
ss.CpuStats.SystemUsage = update.SystemUsage ss.CpuStats.SystemUsage = update.SystemUsage
@ -31,20 +31,21 @@ func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
return engine.StatusOK return engine.StatusOK
} }
// convertToAPITypes converts the libcontainer.ContainerStats to the api specific // convertToAPITypes converts the libcontainer.Stats to the api specific
// structs. This is done to preserve API compatibility and versioning. // structs. This is done to preserve API compatibility and versioning.
func convertToAPITypes(ls *libcontainer.ContainerStats) *types.Stats { func convertToAPITypes(ls *libcontainer.Stats) *types.Stats {
s := &types.Stats{} s := &types.Stats{}
if ls.NetworkStats != nil { if ls.Interfaces != nil {
s.Network = types.Network{ s.Network = types.Network{}
RxBytes: ls.NetworkStats.RxBytes, for _, iface := range ls.Interfaces {
RxPackets: ls.NetworkStats.RxPackets, s.Network.RxBytes += iface.RxBytes
RxErrors: ls.NetworkStats.RxErrors, s.Network.RxPackets += iface.RxPackets
RxDropped: ls.NetworkStats.RxDropped, s.Network.RxErrors += iface.RxErrors
TxBytes: ls.NetworkStats.TxBytes, s.Network.RxDropped += iface.RxDropped
TxPackets: ls.NetworkStats.TxPackets, s.Network.TxBytes += iface.TxBytes
TxErrors: ls.NetworkStats.TxErrors, s.Network.TxPackets += iface.TxPackets
TxDropped: ls.NetworkStats.TxDropped, s.Network.TxErrors += iface.TxErrors
s.Network.TxDropped += iface.TxDropped
} }
} }
cs := ls.CgroupStats cs := ls.CgroupStats

View file

@ -60,7 +60,7 @@ func TestExecInteractiveStdinClose(t *testing.T) {
out, err := cmd.CombinedOutput() out, err := cmd.CombinedOutput()
if err != nil { if err != nil {
t.Fatal(err, out) t.Fatal(err, string(out))
} }
if string(out) == "" { if string(out) == "" {
@ -538,7 +538,6 @@ func TestRunExecDir(t *testing.T) {
id := strings.TrimSpace(out) id := strings.TrimSpace(out)
execDir := filepath.Join(execDriverPath, id) execDir := filepath.Join(execDriverPath, id)
stateFile := filepath.Join(execDir, "state.json") stateFile := filepath.Join(execDir, "state.json")
contFile := filepath.Join(execDir, "container.json")
{ {
fi, err := os.Stat(execDir) fi, err := os.Stat(execDir)
@ -552,10 +551,6 @@ func TestRunExecDir(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
fi, err = os.Stat(contFile)
if err != nil {
t.Fatal(err)
}
} }
stopCmd := exec.Command(dockerBinary, "stop", id) stopCmd := exec.Command(dockerBinary, "stop", id)
@ -564,23 +559,12 @@ func TestRunExecDir(t *testing.T) {
t.Fatal(err, out) t.Fatal(err, out)
} }
{ {
fi, err := os.Stat(execDir) _, err := os.Stat(execDir)
if err != nil { if err == nil {
t.Fatal(err) t.Fatal(err)
} }
if !fi.IsDir() {
t.Fatalf("%q must be a directory", execDir)
}
fi, err = os.Stat(stateFile)
if err == nil { if err == nil {
t.Fatalf("Statefile %q is exists for stopped container!", stateFile) t.Fatalf("Exec directory %q exists for removed container!", execDir)
}
if !os.IsNotExist(err) {
t.Fatalf("Error should be about non-existing, got %s", err)
}
fi, err = os.Stat(contFile)
if err == nil {
t.Fatalf("Container file %q is exists for stopped container!", contFile)
} }
if !os.IsNotExist(err) { if !os.IsNotExist(err) {
t.Fatalf("Error should be about non-existing, got %s", err) t.Fatalf("Error should be about non-existing, got %s", err)
@ -603,10 +587,6 @@ func TestRunExecDir(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
fi, err = os.Stat(contFile)
if err != nil {
t.Fatal(err)
}
} }
rmCmd := exec.Command(dockerBinary, "rm", "-f", id) rmCmd := exec.Command(dockerBinary, "rm", "-f", id)
out, _, err = runCommandWithOutput(rmCmd) out, _, err = runCommandWithOutput(rmCmd)

View file

@ -68,8 +68,7 @@ if [ "$1" = '--go' ]; then
mv tmp-tar src/code.google.com/p/go/src/pkg/archive/tar mv tmp-tar src/code.google.com/p/go/src/pkg/archive/tar
fi fi
# this commit is from docker_1.5 branch in libcontainer, pls delete that branch when you'll update libcontainer again clone git github.com/docker/libcontainer dd3cb8822352fd4acc0b8b426bd86e47e98f6853
clone git github.com/docker/libcontainer 2d3b5af7486f1a4e80a5ed91859d309b4eebf80c
# see src/github.com/docker/libcontainer/update-vendor.sh which is the "source of truth" for libcontainer deps (just like this file) # see src/github.com/docker/libcontainer/update-vendor.sh which is the "source of truth" for libcontainer deps (just like this file)
rm -rf src/github.com/docker/libcontainer/vendor rm -rf src/github.com/docker/libcontainer/vendor
eval "$(grep '^clone ' src/github.com/docker/libcontainer/update-vendor.sh | grep -v 'github.com/codegangsta/cli')" eval "$(grep '^clone ' src/github.com/docker/libcontainer/update-vendor.sh | grep -v 'github.com/codegangsta/cli')"

View file

@ -1,9 +0,0 @@
image: dockercore/libcontainer
script:
# Setup the DockerInDocker environment.
- /dind
- sed -i 's!docker/docker!docker/libcontainer!' /go/src/github.com/docker/docker/hack/make/.validate
- bash /go/src/github.com/docker/docker/hack/make/validate-dco
- bash /go/src/github.com/docker/docker/hack/make/validate-gofmt
- export GOPATH="$GOPATH:/go:$(pwd)/vendor" # Drone mucks with our GOPATH
- make direct-test

View file

@ -0,0 +1 @@
nsinit/nsinit

View file

@ -3,4 +3,5 @@ Rohit Jnagal <jnagal@google.com> (@rjnagal)
Victor Marmol <vmarmol@google.com> (@vmarmol) Victor Marmol <vmarmol@google.com> (@vmarmol)
Mrunal Patel <mpatel@redhat.com> (@mrunalp) Mrunal Patel <mpatel@redhat.com> (@mrunalp)
Alexandr Morozov <lk4d4@docker.com> (@LK4D4) Alexandr Morozov <lk4d4@docker.com> (@LK4D4)
Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
update-vendor.sh: Tianon Gravi <admwiggin@gmail.com> (@tianon) update-vendor.sh: Tianon Gravi <admwiggin@gmail.com> (@tianon)

View file

@ -22,3 +22,10 @@ direct-build:
direct-install: direct-install:
go install -v $(GO_PACKAGES) go install -v $(GO_PACKAGES)
local:
go test -v
validate:
hack/validate.sh

View file

@ -1,5 +1,5 @@
libcontainer libcontainer
Copyright 2012-2014 Docker, Inc. Copyright 2012-2015 Docker, Inc.
This product includes software developed at Docker, Inc. (http://www.docker.com). This product includes software developed at Docker, Inc. (http://www.docker.com).

View file

@ -8,7 +8,7 @@ In the design and development of libcontainer we try to follow these principles:
* Less code is better. * Less code is better.
* Fewer components are better. Do you really need to add one more class? * Fewer components are better. Do you really need to add one more class?
* 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand. * 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand.
* Don't do later what you can do now. "//FIXME: refactor" is not acceptable in new code. * Don't do later what you can do now. "//TODO: refactor" is not acceptable in new code.
* When hesitating between two options, choose the one that is easier to reverse. * When hesitating between two options, choose the one that is easier to reverse.
* "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later. * "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later.
* Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable. * Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable.

View file

@ -1,48 +1,169 @@
## libcontainer - reference implementation for containers [![Build Status](https://ci.dockerproject.com/github.com/docker/libcontainer/status.svg?branch=master)](https://ci.dockerproject.com/github.com/docker/libcontainer) ## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.com/buildStatus/icon?job=Libcontainer Master)](https://jenkins.dockerproject.com/job/Libcontainer%20Master/)
### Note on API changes: Libcontainer provides a native Go implementation for creating containers
with namespaces, cgroups, capabilities, and filesystem access controls.
Please bear with us while we work on making the libcontainer API stable and something that we can support long term. We are currently discussing the API with the community, therefore, if you currently depend on libcontainer please pin your dependency at a specific tag or commit id. Please join the discussion and help shape the API. It allows you to manage the lifecycle of the container performing additional operations
after the container is created.
#### Background
libcontainer specifies configuration options for what a container is. It provides a native Go implementation for using Linux namespaces with no external dependencies. libcontainer provides many convenience functions for working with namespaces, networking, and management.
#### Container #### Container
A container is a self contained execution environment that shares the kernel of the host system and which is (optionally) isolated from other containers in the system. A container is a self contained execution environment that shares the kernel of the
host system and which is (optionally) isolated from other containers in the system.
libcontainer may be used to execute a process in a container. If a user tries to run a new process inside an existing container, the new process is added to the processes executing in the container. #### Using libcontainer
To create a container you first have to initialize an instance of a factory
that will handle the creation and initialization for a container.
Because containers are spawned in a two step process you will need to provide
arguments to a binary that will be executed as the init process for the container.
To use the current binary that is spawning the containers and acting as the parent
you can use `os.Args[0]` and we have a command called `init` setup.
```go
root, err := libcontainer.New("/var/lib/container", libcontainer.InitArgs(os.Args[0], "init"))
if err != nil {
log.Fatal(err)
}
```
Once you have an instance of the factory created we can create a configuration
struct describing how the container is to be created. A sample would look similar to this:
```go
config := &configs.Config{
Rootfs: rootfs,
Capabilities: []string{
"CHOWN",
"DAC_OVERRIDE",
"FSETID",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL",
"AUDIT_WRITE",
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
{Type: configs.NEWPID},
{Type: configs.NEWNET},
}),
Cgroups: &configs.Cgroup{
Name: "test-container",
Parent: "system",
AllowAllDevices: false,
AllowedDevices: configs.DefaultAllowedDevices,
},
Devices: configs.DefaultAutoCreatedDevices,
Hostname: "testing",
Networks: []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: uint64(1024),
Soft: uint64(1024),
},
},
}
```
Once you have the configuration populated you can create a container:
```go
container, err := root.Create("container-id", config)
```
To spawn bash as the initial process inside the container and have the
processes pid returned in order to wait, signal, or kill the process:
```go
process := &libcontainer.Process{
Args: []string{"/bin/bash"},
Env: []string{"PATH=/bin"},
User: "daemon",
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
err := container.Start(process)
if err != nil {
log.Fatal(err)
}
// wait for the process to finish.
status, err := process.Wait()
if err != nil {
log.Fatal(err)
}
// destroy the container.
container.Destroy()
```
Additional ways to interact with a running container are:
```go
// return all the pids for all processes running inside the container.
processes, err := container.Processes()
// get detailed cpu, memory, io, and network statistics for the container and
// it's processes.
stats, err := container.Stats()
#### Root file system // pause all processes inside the container.
container.Pause()
A container runs with a directory known as its *root file system*, or *rootfs*, mounted as the file system root. The rootfs is usually a full system tree. // resume all paused processes.
container.Resume()
```
#### Configuration
A container is initially configured by supplying configuration data when the container is created.
#### nsinit #### nsinit
`nsinit` is a cli application which demonstrates the use of libcontainer. It is able to spawn new containers or join existing containers, based on the current directory. `nsinit` is a cli application which demonstrates the use of libcontainer.
It is able to spawn new containers or join existing containers. A root
filesystem must be provided for use along with a container configuration file.
To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into the directory with your specified configuration. Environment, networking, and different capabilities for the container are specified in this file. The configuration is used for each process executed inside the container. To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into
the directory with your specified configuration. Environment, networking,
and different capabilities for the container are specified in this file.
The configuration is used for each process executed inside the container.
See the `sample_configs` folder for examples of what the container configuration should look like. See the `sample_configs` folder for examples of what the container configuration should look like.
To execute `/bin/bash` in the current directory as a container just run the following **as root**: To execute `/bin/bash` in the current directory as a container just run the following **as root**:
```bash ```bash
nsinit exec /bin/bash nsinit exec --tty /bin/bash
``` ```
If you wish to spawn another process inside the container while your current bash session is running, run the same command again to get another bash shell (or change the command). If the original process (PID 1) dies, all other processes spawned inside the container will be killed and the namespace will be removed. If you wish to spawn another process inside the container while your
current bash session is running, run the same command again to
get another bash shell (or change the command). If the original
process (PID 1) dies, all other processes spawned inside the container
will be killed and the namespace will be removed.
You can identify if a process is running in a container by looking to see if `state.json` is in the root of the directory. You can identify if a process is running in a container by
looking to see if `state.json` is in the root of the directory.
You may also specify an alternate root place where the `container.json` file is read and where the `state.json` file will be saved. You may also specify an alternate root place where
the `container.json` file is read and where the `state.json` file will be saved.
#### Future #### Future
See the [roadmap](ROADMAP.md). See the [roadmap](ROADMAP.md).

View file

@ -1,21 +0,0 @@
/*
Temporary API endpoint for libcontainer while the full API is finalized (api.go).
*/
package libcontainer
import (
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/network"
)
// TODO(vmarmol): Complete Stats() in final libcontainer API and move users to that.
// DEPRECATED: The below portions are only to be used during the transition to the official API.
// Returns all available stats for the given container.
func GetStats(container *Config, state *State) (stats *ContainerStats, err error) {
stats = &ContainerStats{}
if stats.CgroupStats, err = fs.GetStats(state.CgroupPaths); err != nil {
return stats, err
}
stats.NetworkStats, err = network.GetStats(&state.NetworkState)
return stats, err
}

View file

@ -24,7 +24,6 @@ func ApplyProfile(name string) error {
if name == "" { if name == "" {
return nil return nil
} }
cName := C.CString(name) cName := C.CString(name)
defer C.free(unsafe.Pointer(cName)) defer C.free(unsafe.Pointer(cName))

View file

@ -0,0 +1,91 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"github.com/syndtr/gocapability/capability"
)
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
var capabilityList = map[string]capability.Cap{
"SETPCAP": capability.CAP_SETPCAP,
"SYS_MODULE": capability.CAP_SYS_MODULE,
"SYS_RAWIO": capability.CAP_SYS_RAWIO,
"SYS_PACCT": capability.CAP_SYS_PACCT,
"SYS_ADMIN": capability.CAP_SYS_ADMIN,
"SYS_NICE": capability.CAP_SYS_NICE,
"SYS_RESOURCE": capability.CAP_SYS_RESOURCE,
"SYS_TIME": capability.CAP_SYS_TIME,
"SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG,
"MKNOD": capability.CAP_MKNOD,
"AUDIT_WRITE": capability.CAP_AUDIT_WRITE,
"AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL,
"MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE,
"MAC_ADMIN": capability.CAP_MAC_ADMIN,
"NET_ADMIN": capability.CAP_NET_ADMIN,
"SYSLOG": capability.CAP_SYSLOG,
"CHOWN": capability.CAP_CHOWN,
"NET_RAW": capability.CAP_NET_RAW,
"DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE,
"FOWNER": capability.CAP_FOWNER,
"DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH,
"FSETID": capability.CAP_FSETID,
"KILL": capability.CAP_KILL,
"SETGID": capability.CAP_SETGID,
"SETUID": capability.CAP_SETUID,
"LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE,
"NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
"NET_BROADCAST": capability.CAP_NET_BROADCAST,
"IPC_LOCK": capability.CAP_IPC_LOCK,
"IPC_OWNER": capability.CAP_IPC_OWNER,
"SYS_CHROOT": capability.CAP_SYS_CHROOT,
"SYS_PTRACE": capability.CAP_SYS_PTRACE,
"SYS_BOOT": capability.CAP_SYS_BOOT,
"LEASE": capability.CAP_LEASE,
"SETFCAP": capability.CAP_SETFCAP,
"WAKE_ALARM": capability.CAP_WAKE_ALARM,
"BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND,
"AUDIT_READ": capability.CAP_AUDIT_READ,
}
func newCapWhitelist(caps []string) (*whitelist, error) {
l := []capability.Cap{}
for _, c := range caps {
v, ok := capabilityList[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
l = append(l, v)
}
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
return &whitelist{
keep: l,
pid: pid,
}, nil
}
type whitelist struct {
pid capability.Capabilities
keep []capability.Cap
}
// dropBoundingSet drops the capability bounding set to those specified in the whitelist.
func (w *whitelist) dropBoundingSet() error {
w.pid.Clear(capability.BOUNDS)
w.pid.Set(capability.BOUNDS, w.keep...)
return w.pid.Apply(capability.BOUNDS)
}
// drop drops all capabilities for the current process except those specified in the whitelist.
func (w *whitelist) drop() error {
w.pid.Clear(allCapabilityTypes)
w.pid.Set(allCapabilityTypes, w.keep...)
return w.pid.Apply(allCapabilityTypes)
}

View file

@ -3,16 +3,38 @@ package cgroups
import ( import (
"fmt" "fmt"
"github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/configs"
) )
type FreezerState string type Manager interface {
// Apply cgroup configuration to the process with the specified pid
Apply(pid int) error
const ( // Returns the PIDs inside the cgroup set
Undefined FreezerState = "" GetPids() ([]int, error)
Frozen FreezerState = "FROZEN"
Thawed FreezerState = "THAWED" // Returns statistics for the cgroup set
) GetStats() (*Stats, error)
// Toggles the freezer cgroup according with specified state
Freeze(state configs.FreezerState) error
// Destroys the cgroup set
Destroy() error
// NewCgroupManager() and LoadCgroupManager() require following attributes:
// Paths map[string]string
// Cgroups *cgroups.Cgroup
// Paths maps cgroup subsystem to path at which it is mounted.
// Cgroups specifies specific cgroup settings for the various subsystems
// Returns cgroup paths to save in a state file and to be able to
// restore the object later.
GetPaths() map[string]string
// Set the cgroup as configured.
Set(container *configs.Config) error
}
type NotFoundError struct { type NotFoundError struct {
Subsystem string Subsystem string
@ -32,25 +54,6 @@ func IsNotFound(err error) bool {
if err == nil { if err == nil {
return false return false
} }
_, ok := err.(*NotFoundError) _, ok := err.(*NotFoundError)
return ok return ok
} }
type Cgroup struct {
Name string `json:"name,omitempty"`
Parent string `json:"parent,omitempty"` // name of parent cgroup or slice
AllowAllDevices bool `json:"allow_all_devices,omitempty"` // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
AllowedDevices []*devices.Device `json:"allowed_devices,omitempty"`
Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes)
MemoryReservation int64 `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes)
MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap
CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers)
CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use
CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use
Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process
Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
}

View file

@ -1,13 +1,14 @@
package fs package fs
import ( import (
"fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
"sync"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
) )
var ( var (
@ -24,43 +25,65 @@ var (
CgroupProcesses = "cgroup.procs" CgroupProcesses = "cgroup.procs"
) )
// The absolute path to the root of the cgroup hierarchies.
var cgroupRoot string
// TODO(vmarmol): Report error here, we'll probably need to wait for the new API.
func init() {
// we can pick any subsystem to find the root
cpuRoot, err := cgroups.FindCgroupMountpoint("cpu")
if err != nil {
return
}
cgroupRoot = filepath.Dir(cpuRoot)
if _, err := os.Stat(cgroupRoot); err != nil {
return
}
}
type subsystem interface { type subsystem interface {
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'. // Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error GetStats(path string, stats *cgroups.Stats) error
// Removes the cgroup represented by 'data'. // Removes the cgroup represented by 'data'.
Remove(*data) error Remove(*data) error
// Creates and joins the cgroup represented by data. // Creates and joins the cgroup represented by data.
Set(*data) error Apply(*data) error
// Set the cgroup represented by cgroup.
Set(path string, cgroup *configs.Cgroup) error
}
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
// The absolute path to the root of the cgroup hierarchies.
var cgroupRootLock sync.Mutex
var cgroupRoot string
// Gets the cgroupRoot.
func getCgroupRoot() (string, error) {
cgroupRootLock.Lock()
defer cgroupRootLock.Unlock()
if cgroupRoot != "" {
return cgroupRoot, nil
}
// we can pick any subsystem to find the root
cpuRoot, err := cgroups.FindCgroupMountpoint("cpu")
if err != nil {
return "", err
}
root := filepath.Dir(cpuRoot)
if _, err := os.Stat(root); err != nil {
return "", err
}
cgroupRoot = root
return cgroupRoot, nil
} }
type data struct { type data struct {
root string root string
cgroup string cgroup string
c *cgroups.Cgroup c *configs.Cgroup
pid int pid int
} }
func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { func (m *Manager) Apply(pid int) error {
d, err := getCgroupData(c, pid) if m.Cgroups == nil {
return nil
}
d, err := getCgroupData(m.Cgroups, pid)
if err != nil { if err != nil {
return nil, err return err
} }
paths := make(map[string]string) paths := make(map[string]string)
@ -70,10 +93,10 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
} }
}() }()
for name, sys := range subsystems { for name, sys := range subsystems {
if err := sys.Set(d); err != nil { if err := sys.Apply(d); err != nil {
return nil, err return err
} }
// FIXME: Apply should, ideally, be reentrant or be broken up into a separate // TODO: Apply should, ideally, be reentrant or be broken up into a separate
// create and join phase so that the cgroup hierarchy for a container can be // create and join phase so that the cgroup hierarchy for a container can be
// created then join consists of writing the process pids to cgroup.procs // created then join consists of writing the process pids to cgroup.procs
p, err := d.path(name) p, err := d.path(name)
@ -81,16 +104,26 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
if cgroups.IsNotFound(err) { if cgroups.IsNotFound(err) {
continue continue
} }
return nil, err return err
} }
paths[name] = p paths[name] = p
} }
return paths, nil m.Paths = paths
return nil
}
func (m *Manager) Destroy() error {
return cgroups.RemovePaths(m.Paths)
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
} }
// Symmetrical public function to update device based cgroups. Also available // Symmetrical public function to update device based cgroups. Also available
// in the systemd implementation. // in the systemd implementation.
func ApplyDevices(c *cgroups.Cgroup, pid int) error { func ApplyDevices(c *configs.Cgroup, pid int) error {
d, err := getCgroupData(c, pid) d, err := getCgroupData(c, pid)
if err != nil { if err != nil {
return err return err
@ -98,12 +131,12 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error {
devices := subsystems["devices"] devices := subsystems["devices"]
return devices.Set(d) return devices.Apply(d)
} }
func GetStats(systemPaths map[string]string) (*cgroups.Stats, error) { func (m *Manager) GetStats() (*cgroups.Stats, error) {
stats := cgroups.NewStats() stats := cgroups.NewStats()
for name, path := range systemPaths { for name, path := range m.Paths {
sys, ok := subsystems[name] sys, ok := subsystems[name]
if !ok || !cgroups.PathExists(path) { if !ok || !cgroups.PathExists(path) {
continue continue
@ -116,29 +149,48 @@ func GetStats(systemPaths map[string]string) (*cgroups.Stats, error) {
return stats, nil return stats, nil
} }
func (m *Manager) Set(container *configs.Config) error {
for name, path := range m.Paths {
sys, ok := subsystems[name]
if !ok || !cgroups.PathExists(path) {
continue
}
if err := sys.Set(path, container.Cgroups); err != nil {
return err
}
}
return nil
}
// Freeze toggles the container's freezer cgroup depending on the state // Freeze toggles the container's freezer cgroup depending on the state
// provided // provided
func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { func (m *Manager) Freeze(state configs.FreezerState) error {
d, err := getCgroupData(c, 0) d, err := getCgroupData(m.Cgroups, 0)
if err != nil { if err != nil {
return err return err
} }
prevState := c.Freezer dir, err := d.path("freezer")
c.Freezer = state if err != nil {
return err
}
prevState := m.Cgroups.Freezer
m.Cgroups.Freezer = state
freezer := subsystems["freezer"] freezer := subsystems["freezer"]
err = freezer.Set(d) err = freezer.Set(dir, m.Cgroups)
if err != nil { if err != nil {
c.Freezer = prevState m.Cgroups.Freezer = prevState
return err return err
} }
return nil return nil
} }
func GetPids(c *cgroups.Cgroup) ([]int, error) { func (m *Manager) GetPids() ([]int, error) {
d, err := getCgroupData(c, 0) d, err := getCgroupData(m.Cgroups, 0)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -151,9 +203,10 @@ func GetPids(c *cgroups.Cgroup) ([]int, error) {
return cgroups.ReadProcsFile(dir) return cgroups.ReadProcsFile(dir)
} }
func getCgroupData(c *cgroups.Cgroup, pid int) (*data, error) { func getCgroupData(c *configs.Cgroup, pid int) (*data, error) {
if cgroupRoot == "" { root, err := getCgroupRoot()
return nil, fmt.Errorf("failed to find the cgroup root") if err != nil {
return nil, err
} }
cgroup := c.Name cgroup := c.Name
@ -162,7 +215,7 @@ func getCgroupData(c *cgroups.Cgroup, pid int) (*data, error) {
} }
return &data{ return &data{
root: cgroupRoot, root: root,
cgroup: cgroup, cgroup: cgroup,
c: c, c: c,
pid: pid, pid: pid,

View file

@ -9,17 +9,32 @@ import (
"strings" "strings"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
) )
type BlkioGroup struct { type BlkioGroup struct {
} }
func (s *BlkioGroup) Set(d *data) error { func (s *BlkioGroup) Apply(d *data) error {
// we just want to join this group even though we don't set anything dir, err := d.join("blkio")
if _, err := d.join("blkio"); err != nil && !cgroups.IsNotFound(err) { if err != nil && !cgroups.IsNotFound(err) {
return err return err
} }
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.BlkioWeight != 0 {
if err := writeFile(path, "blkio.weight", strconv.FormatInt(cgroup.BlkioWeight, 10)); err != nil {
return err
}
}
return nil return nil
} }

View file

@ -1,6 +1,7 @@
package fs package fs
import ( import (
"strconv"
"testing" "testing"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
@ -72,6 +73,35 @@ func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, min
*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op}) *blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op})
} }
func TestBlkioSetWeight(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightBefore = 100
weightAfter = 200
)
helper.writeFileContents(map[string]string{
"blkio.weight": strconv.Itoa(weightBefore),
})
helper.CgroupData.c.BlkioWeight = weightAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight")
if err != nil {
t.Fatalf("Failed to parse blkio.weight - %s", err)
}
if value != weightAfter {
t.Fatal("Got the wrong value, set blkio.weight failed.")
}
}
func TestBlkioStats(t *testing.T) { func TestBlkioStats(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t) helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup() defer helper.cleanup()

View file

@ -7,33 +7,44 @@ import (
"strconv" "strconv"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
) )
type CpuGroup struct { type CpuGroup struct {
} }
func (s *CpuGroup) Set(d *data) error { func (s *CpuGroup) Apply(d *data) error {
// We always want to join the cpu group, to allow fair cpu scheduling // We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis // on a container basis
dir, err := d.join("cpu") dir, err := d.join("cpu")
if err != nil { if err != nil {
return err return err
} }
if d.c.CpuShares != 0 {
if err := writeFile(dir, "cpu.shares", strconv.FormatInt(d.c.CpuShares, 10)); err != nil { if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.CpuShares != 0 {
if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.CpuShares, 10)); err != nil {
return err return err
} }
} }
if d.c.CpuPeriod != 0 { if cgroup.CpuPeriod != 0 {
if err := writeFile(dir, "cpu.cfs_period_us", strconv.FormatInt(d.c.CpuPeriod, 10)); err != nil { if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.CpuPeriod, 10)); err != nil {
return err return err
} }
} }
if d.c.CpuQuota != 0 { if cgroup.CpuQuota != 0 {
if err := writeFile(dir, "cpu.cfs_quota_us", strconv.FormatInt(d.c.CpuQuota, 10)); err != nil { if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.CpuQuota, 10)); err != nil {
return err return err
} }
} }
return nil return nil
} }

View file

@ -2,11 +2,81 @@ package fs
import ( import (
"fmt" "fmt"
"strconv"
"testing" "testing"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
) )
func TestCpuSetShares(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
sharesBefore = 1024
sharesAfter = 512
)
helper.writeFileContents(map[string]string{
"cpu.shares": strconv.Itoa(sharesBefore),
})
helper.CgroupData.c.CpuShares = sharesAfter
cpu := &CpuGroup{}
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares")
if err != nil {
t.Fatalf("Failed to parse cpu.shares - %s", err)
}
if value != sharesAfter {
t.Fatal("Got the wrong value, set cpu.shares failed.")
}
}
func TestCpuSetBandWidth(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
quotaBefore = 8000
quotaAfter = 5000
periodBefore = 10000
periodAfter = 7000
)
helper.writeFileContents(map[string]string{
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
})
helper.CgroupData.c.CpuQuota = quotaAfter
helper.CgroupData.c.CpuPeriod = periodAfter
cpu := &CpuGroup{}
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
if err != nil {
t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
}
if quota != quotaAfter {
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
}
period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
}
if period != periodAfter {
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
}
}
func TestCpuStats(t *testing.T) { func TestCpuStats(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t) helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup() defer helper.cleanup()

View file

@ -8,6 +8,7 @@ import (
"strings" "strings"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
) )
@ -21,7 +22,7 @@ var clockTicks = uint64(system.GetClockTicks())
type CpuacctGroup struct { type CpuacctGroup struct {
} }
func (s *CpuacctGroup) Set(d *data) error { func (s *CpuacctGroup) Apply(d *data) error {
// we just want to join this group even though we don't set anything // we just want to join this group even though we don't set anything
if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) { if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) {
return err return err
@ -30,6 +31,10 @@ func (s *CpuacctGroup) Set(d *data) error {
return nil return nil
} }
func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
return nil
}
func (s *CpuacctGroup) Remove(d *data) error { func (s *CpuacctGroup) Remove(d *data) error {
return removePath(d.path("cpuacct")) return removePath(d.path("cpuacct"))
} }

View file

@ -8,17 +8,34 @@ import (
"strconv" "strconv"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
) )
type CpusetGroup struct { type CpusetGroup struct {
} }
func (s *CpusetGroup) Set(d *data) error { func (s *CpusetGroup) Apply(d *data) error {
dir, err := d.path("cpuset") dir, err := d.path("cpuset")
if err != nil { if err != nil {
return err return err
} }
return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid) return s.ApplyDir(dir, d.c, d.pid)
}
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.CpusetCpus != "" {
if err := writeFile(path, "cpuset.cpus", cgroup.CpusetCpus); err != nil {
return err
}
}
if cgroup.CpusetMems != "" {
if err := writeFile(path, "cpuset.mems", cgroup.CpusetMems); err != nil {
return err
}
}
return nil
} }
func (s *CpusetGroup) Remove(d *data) error { func (s *CpusetGroup) Remove(d *data) error {
@ -29,7 +46,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil return nil
} }
func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error { func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
if err := s.ensureParent(dir); err != nil { if err := s.ensureParent(dir); err != nil {
return err return err
} }
@ -40,17 +57,10 @@ func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error {
return err return err
} }
// If we don't use --cpuset-xxx, the default value inherit from parent cgroup // the default values inherit from parent cgroup are already set in
// is set in s.ensureParent, otherwise, use the value we set // s.ensureParent, cover these if we have our own
if cpus != "" { if err := s.Set(dir, cgroup); err != nil {
if err := writeFile(dir, "cpuset.cpus", cpus); err != nil { return err
return err
}
}
if mems != "" {
if err := writeFile(dir, "cpuset.mems", mems); err != nil {
return err
}
} }
return nil return nil

View file

@ -0,0 +1,63 @@
package fs
import (
"testing"
)
func TestCpusetSetCpus(t *testing.T) {
helper := NewCgroupTestUtil("cpuset", t)
defer helper.cleanup()
const (
cpusBefore = "0"
cpusAfter = "1-3"
)
helper.writeFileContents(map[string]string{
"cpuset.cpus": cpusBefore,
})
helper.CgroupData.c.CpusetCpus = cpusAfter
cpuset := &CpusetGroup{}
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus")
if err != nil {
t.Fatalf("Failed to parse cpuset.cpus - %s", err)
}
if value != cpusAfter {
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
}
}
func TestCpusetSetMems(t *testing.T) {
helper := NewCgroupTestUtil("cpuset", t)
defer helper.cleanup()
const (
memsBefore = "0"
memsAfter = "1"
)
helper.writeFileContents(map[string]string{
"cpuset.mems": memsBefore,
})
helper.CgroupData.c.CpusetMems = memsAfter
cpuset := &CpusetGroup{}
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems")
if err != nil {
t.Fatalf("Failed to parse cpuset.mems - %s", err)
}
if value != memsAfter {
t.Fatal("Got the wrong value, set cpuset.mems failed.")
}
}

View file

@ -1,27 +1,39 @@
package fs package fs
import "github.com/docker/libcontainer/cgroups" import (
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
)
type DevicesGroup struct { type DevicesGroup struct {
} }
func (s *DevicesGroup) Set(d *data) error { func (s *DevicesGroup) Apply(d *data) error {
dir, err := d.join("devices") dir, err := d.join("devices")
if err != nil { if err != nil {
return err return err
} }
if !d.c.AllowAllDevices { if err := s.Set(dir, d.c); err != nil {
if err := writeFile(dir, "devices.deny", "a"); err != nil { return err
}
return nil
}
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
if !cgroup.AllowAllDevices {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err return err
} }
for _, dev := range d.c.AllowedDevices { for _, dev := range cgroup.AllowedDevices {
if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil { if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err return err
} }
} }
} }
return nil return nil
} }

View file

@ -0,0 +1,48 @@
package fs
import (
"testing"
"github.com/docker/libcontainer/configs"
)
var (
allowedDevices = []*configs.Device{
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
}
allowedList = "c 1:5 rwm"
)
func TestDevicesSetAllow(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"device.deny": "a",
})
helper.CgroupData.c.AllowAllDevices = false
helper.CgroupData.c.AllowedDevices = allowedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
// FIXME: this doesn't make sence, the file devices.allow under real cgroupfs
// is not allowed to read. Our test path don't have cgroupfs mounted.
value, err := getCgroupParamString(helper.CgroupPath, "devices.allow")
if err != nil {
t.Fatalf("Failed to parse devices.allow - %s", err)
}
if value != allowedList {
t.Fatal("Got the wrong value, set devices.allow failed.")
}
}

View file

@ -5,37 +5,42 @@ import (
"time" "time"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
) )
type FreezerGroup struct { type FreezerGroup struct {
} }
func (s *FreezerGroup) Set(d *data) error { func (s *FreezerGroup) Apply(d *data) error {
switch d.c.Freezer { dir, err := d.join("freezer")
case cgroups.Frozen, cgroups.Thawed: if err != nil && !cgroups.IsNotFound(err) {
dir, err := d.path("freezer") return err
if err != nil { }
return err
}
if err := writeFile(dir, "freezer.state", string(d.c.Freezer)); err != nil { if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
switch cgroup.Freezer {
case configs.Frozen, configs.Thawed:
if err := writeFile(path, "freezer.state", string(cgroup.Freezer)); err != nil {
return err return err
} }
for { for {
state, err := readFile(dir, "freezer.state") state, err := readFile(path, "freezer.state")
if err != nil { if err != nil {
return err return err
} }
if strings.TrimSpace(state) == string(d.c.Freezer) { if strings.TrimSpace(state) == string(cgroup.Freezer) {
break break
} }
time.Sleep(1 * time.Millisecond) time.Sleep(1 * time.Millisecond)
} }
default:
if _, err := d.join("freezer"); err != nil && !cgroups.IsNotFound(err) {
return err
}
} }
return nil return nil

View file

@ -8,12 +8,13 @@ import (
"strconv" "strconv"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
) )
type MemoryGroup struct { type MemoryGroup struct {
} }
func (s *MemoryGroup) Set(d *data) error { func (s *MemoryGroup) Apply(d *data) error {
dir, err := d.join("memory") dir, err := d.join("memory")
// only return an error for memory if it was specified // only return an error for memory if it was specified
if err != nil && (d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0) { if err != nil && (d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0) {
@ -25,31 +26,42 @@ func (s *MemoryGroup) Set(d *data) error {
} }
}() }()
// Only set values if some config was specified. if err := s.Set(dir, d.c); err != nil {
if d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0 { return err
if d.c.Memory != 0 { }
if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(d.c.Memory, 10)); err != nil {
return err return nil
} }
}
if d.c.MemoryReservation != 0 { func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(d.c.MemoryReservation, 10)); err != nil { if cgroup.Memory != 0 {
return err if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Memory, 10)); err != nil {
} return err
}
// By default, MemorySwap is set to twice the size of RAM.
// If you want to omit MemorySwap, set it to '-1'.
if d.c.MemorySwap == 0 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil {
return err
}
}
if d.c.MemorySwap > 0 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.MemorySwap, 10)); err != nil {
return err
}
} }
} }
if cgroup.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.MemoryReservation, 10)); err != nil {
return err
}
}
// By default, MemorySwap is set to twice the size of Memory.
if cgroup.MemorySwap == 0 && cgroup.Memory != 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Memory*2, 10)); err != nil {
return err
}
}
if cgroup.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil {
return err
}
}
if cgroup.OomKillDisable {
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
return err
}
}
return nil return nil
} }

View file

@ -1,6 +1,7 @@
package fs package fs
import ( import (
"strconv"
"testing" "testing"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
@ -14,6 +15,46 @@ rss 1024`
memoryFailcnt = "100\n" memoryFailcnt = "100\n"
) )
func TestMemorySetMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryBefore = 314572800 // 300M
memoryAfter = 524288000 // 500M
reservationBefore = 209715200 // 200M
reservationAfter = 314572800 // 300M
)
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
})
helper.CgroupData.c.Memory = memoryAfter
helper.CgroupData.c.MemoryReservation = reservationAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
}
value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
}
if value != reservationAfter {
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
}
}
func TestMemoryStats(t *testing.T) { func TestMemoryStats(t *testing.T) {
helper := NewCgroupTestUtil("memory", t) helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup() defer helper.cleanup()
@ -132,3 +173,30 @@ func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
t.Fatal("Expected failure") t.Fatal("Expected failure")
} }
} }
func TestMemorySetOomControl(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
oom_kill_disable = 1 // disable oom killer, default is 0
)
helper.writeFileContents(map[string]string{
"memory.oom_control": strconv.Itoa(oom_kill_disable),
})
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control")
if err != nil {
t.Fatalf("Failed to parse memory.oom_control - %s", err)
}
if value != oom_kill_disable {
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
}
}

View file

@ -2,12 +2,13 @@ package fs
import ( import (
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
) )
type PerfEventGroup struct { type PerfEventGroup struct {
} }
func (s *PerfEventGroup) Set(d *data) error { func (s *PerfEventGroup) Apply(d *data) error {
// we just want to join this group even though we don't set anything // we just want to join this group even though we don't set anything
if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) { if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) {
return err return err
@ -15,6 +16,10 @@ func (s *PerfEventGroup) Set(d *data) error {
return nil return nil
} }
func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
return nil
}
func (s *PerfEventGroup) Remove(d *data) error { func (s *PerfEventGroup) Remove(d *data) error {
return removePath(d.path("perf_event")) return removePath(d.path("perf_event"))
} }

View file

@ -10,6 +10,8 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"testing" "testing"
"github.com/docker/libcontainer/configs"
) )
type cgroupTestUtil struct { type cgroupTestUtil struct {
@ -26,7 +28,9 @@ type cgroupTestUtil struct {
// Creates a new test util for the specified subsystem // Creates a new test util for the specified subsystem
func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil { func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil {
d := &data{} d := &data{
c: &configs.Cgroup{},
}
tempDir, err := ioutil.TempDir("", fmt.Sprintf("%s_cgroup_test", subsystem)) tempDir, err := ioutil.TempDir("", fmt.Sprintf("%s_cgroup_test", subsystem))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)

View file

@ -60,3 +60,13 @@ func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
return parseUint(strings.TrimSpace(string(contents)), 10, 64) return parseUint(strings.TrimSpace(string(contents)), 10, 64)
} }
// Gets a string value from the specified cgroup file
func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
if err != nil {
return "", err
}
return strings.TrimSpace(string(contents)), nil
}

View file

@ -6,24 +6,50 @@ import (
"fmt" "fmt"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
) )
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func UseSystemd() bool { func UseSystemd() bool {
return false return false
} }
func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { func (m *Manager) Apply(pid int) error {
return nil, fmt.Errorf("Systemd not supported")
}
func GetPids(c *cgroups.Cgroup) ([]int, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func ApplyDevices(c *cgroups.Cgroup, pid int) error {
return fmt.Errorf("Systemd not supported") return fmt.Errorf("Systemd not supported")
} }
func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { func (m *Manager) GetPids() ([]int, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func (m *Manager) Destroy() error {
return fmt.Errorf("Systemd not supported")
}
func (m *Manager) GetPaths() map[string]string {
return nil
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func (m *Manager) Set(container *configs.Config) error {
return nil, fmt.Errorf("Systemd not supported")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
return fmt.Errorf("Systemd not supported")
}
func ApplyDevices(c *configs.Cgroup, pid int) error {
return fmt.Errorf("Systemd not supported")
}
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
return fmt.Errorf("Systemd not supported") return fmt.Errorf("Systemd not supported")
} }

View file

@ -16,21 +16,38 @@ import (
systemd "github.com/coreos/go-systemd/dbus" systemd "github.com/coreos/go-systemd/dbus"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/configs"
"github.com/godbus/dbus" "github.com/godbus/dbus"
) )
type systemdCgroup struct { type Manager struct {
cgroup *cgroups.Cgroup Cgroups *configs.Cgroup
Paths map[string]string
} }
type subsystem interface { type subsystem interface {
GetStats(string, *cgroups.Stats) error // Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
// Set the cgroup represented by cgroup.
Set(path string, cgroup *configs.Cgroup) error
}
var subsystems = map[string]subsystem{
"devices": &fs.DevicesGroup{},
"memory": &fs.MemoryGroup{},
"cpu": &fs.CpuGroup{},
"cpuset": &fs.CpusetGroup{},
"cpuacct": &fs.CpuacctGroup{},
"blkio": &fs.BlkioGroup{},
"perf_event": &fs.PerfEventGroup{},
"freezer": &fs.FreezerGroup{},
} }
var ( var (
connLock sync.Mutex connLock sync.Mutex
theConn *systemd.Conn theConn *systemd.Conn
hasStartTransientUnit bool hasStartTransientUnit bool
hasTransientDefaultDependencies bool
) )
func newProp(name string, units interface{}) systemd.Property { func newProp(name string, units interface{}) systemd.Property {
@ -64,6 +81,18 @@ func UseSystemd() bool {
if dbusError, ok := err.(dbus.Error); ok { if dbusError, ok := err.(dbus.Error); ok {
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" { if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
hasStartTransientUnit = false hasStartTransientUnit = false
return hasStartTransientUnit
}
}
}
// Assume StartTransientUnit on a scope allows DefaultDependencies
hasTransientDefaultDependencies = true
ddf := newProp("DefaultDependencies", false)
if _, err := theConn.StartTransientUnit("docker-systemd-test-default-dependencies.scope", "replace", ddf); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if dbusError.Name == "org.freedesktop.DBus.Error.PropertyReadOnly" {
hasTransientDefaultDependencies = false
} }
} }
} }
@ -81,16 +110,14 @@ func getIfaceForUnit(unitName string) string {
return "Unit" return "Unit"
} }
func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { func (m *Manager) Apply(pid int) error {
var ( var (
c = m.Cgroups
unitName = getUnitName(c) unitName = getUnitName(c)
slice = "system.slice" slice = "system.slice"
properties []systemd.Property properties []systemd.Property
res = &systemdCgroup{}
) )
res.cgroup = c
if c.Slice != "" { if c.Slice != "" {
slice = c.Slice slice = c.Slice
} }
@ -108,6 +135,11 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
newProp("CPUAccounting", true), newProp("CPUAccounting", true),
newProp("BlockIOAccounting", true)) newProp("BlockIOAccounting", true))
if hasTransientDefaultDependencies {
properties = append(properties,
newProp("DefaultDependencies", false))
}
if c.Memory != 0 { if c.Memory != 0 {
properties = append(properties, properties = append(properties,
newProp("MemoryLimit", uint64(c.Memory))) newProp("MemoryLimit", uint64(c.Memory)))
@ -119,20 +151,29 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
newProp("CPUShares", uint64(c.CpuShares))) newProp("CPUShares", uint64(c.CpuShares)))
} }
if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { if c.BlkioWeight != 0 {
return nil, err properties = append(properties,
newProp("BlockIOWeight", uint64(c.BlkioWeight)))
} }
if !c.AllowAllDevices { if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
if err := joinDevices(c, pid); err != nil { return err
return nil, err }
}
if err := joinDevices(c, pid); err != nil {
return err
}
// TODO: CpuQuota and CpuPeriod not available in systemd
// we need to manually join the cpu.cfs_quota_us and cpu.cfs_period_us
if err := joinCpu(c, pid); err != nil {
return err
} }
// -1 disables memorySwap // -1 disables memorySwap
if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) { if c.MemorySwap >= 0 && c.Memory != 0 {
if err := joinMemory(c, pid); err != nil { if err := joinMemory(c, pid); err != nil {
return nil, err return err
} }
} }
@ -140,11 +181,11 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
// we need to manually join the freezer and cpuset cgroup in systemd // we need to manually join the freezer and cpuset cgroup in systemd
// because it does not currently support it via the dbus api. // because it does not currently support it via the dbus api.
if err := joinFreezer(c, pid); err != nil { if err := joinFreezer(c, pid); err != nil {
return nil, err return err
} }
if err := joinCpuset(c, pid); err != nil { if err := joinCpuset(c, pid); err != nil {
return nil, err return err
} }
paths := make(map[string]string) paths := make(map[string]string)
@ -158,24 +199,53 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
"perf_event", "perf_event",
"freezer", "freezer",
} { } {
subsystemPath, err := getSubsystemPath(res.cgroup, sysname) subsystemPath, err := getSubsystemPath(m.Cgroups, sysname)
if err != nil { if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem // Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) { if cgroups.IsNotFound(err) {
continue continue
} }
return nil, err return err
} }
paths[sysname] = subsystemPath paths[sysname] = subsystemPath
} }
return paths, nil
m.Paths = paths
return nil
}
func (m *Manager) Destroy() error {
return cgroups.RemovePaths(m.Paths)
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
} }
func writeFile(dir, file, data string) error { func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
} }
func joinFreezer(c *cgroups.Cgroup, pid int) error { func joinCpu(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "cpu")
if err != nil {
return err
}
if c.CpuQuota != 0 {
if err = ioutil.WriteFile(filepath.Join(path, "cpu.cfs_quota_us"), []byte(strconv.FormatInt(c.CpuQuota, 10)), 0700); err != nil {
return err
}
}
if c.CpuPeriod != 0 {
if err = ioutil.WriteFile(filepath.Join(path, "cpu.cfs_period_us"), []byte(strconv.FormatInt(c.CpuPeriod, 10)), 0700); err != nil {
return err
}
}
return nil
}
func joinFreezer(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "freezer") path, err := getSubsystemPath(c, "freezer")
if err != nil { if err != nil {
return err return err
@ -188,7 +258,7 @@ func joinFreezer(c *cgroups.Cgroup, pid int) error {
return ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700) return ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700)
} }
func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) mountpoint, err := cgroups.FindCgroupMountpoint(subsystem)
if err != nil { if err != nil {
return "", err return "", err
@ -207,8 +277,8 @@ func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) {
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
} }
func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { func (m *Manager) Freeze(state configs.FreezerState) error {
path, err := getSubsystemPath(c, "freezer") path, err := getSubsystemPath(m.Cgroups, "freezer")
if err != nil { if err != nil {
return err return err
} }
@ -226,11 +296,14 @@ func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error {
} }
time.Sleep(1 * time.Millisecond) time.Sleep(1 * time.Millisecond)
} }
m.Cgroups.Freezer = state
return nil return nil
} }
func GetPids(c *cgroups.Cgroup) ([]int, error) { func (m *Manager) GetPids() ([]int, error) {
path, err := getSubsystemPath(c, "cpu") path, err := getSubsystemPath(m.Cgroups, "cpu")
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -238,7 +311,26 @@ func GetPids(c *cgroups.Cgroup) ([]int, error) {
return cgroups.ReadProcsFile(path) return cgroups.ReadProcsFile(path)
} }
func getUnitName(c *cgroups.Cgroup) string { func (m *Manager) GetStats() (*cgroups.Stats, error) {
stats := cgroups.NewStats()
for name, path := range m.Paths {
sys, ok := subsystems[name]
if !ok || !cgroups.PathExists(path) {
continue
}
if err := sys.GetStats(path, stats); err != nil {
return nil, err
}
}
return stats, nil
}
func (m *Manager) Set(container *configs.Config) error {
panic("not implemented")
}
func getUnitName(c *configs.Cgroup) string {
return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name)
} }
@ -253,7 +345,7 @@ func getUnitName(c *cgroups.Cgroup) string {
// Note: we can't use systemd to set up the initial limits, and then change the cgroup // Note: we can't use systemd to set up the initial limits, and then change the cgroup
// because systemd will re-write the device settings if it needs to re-apply the cgroup context. // because systemd will re-write the device settings if it needs to re-apply the cgroup context.
// This happens at least for v208 when any sibling unit is started. // This happens at least for v208 when any sibling unit is started.
func joinDevices(c *cgroups.Cgroup, pid int) error { func joinDevices(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "devices") path, err := getSubsystemPath(c, "devices")
if err != nil { if err != nil {
return err return err
@ -267,26 +359,26 @@ func joinDevices(c *cgroups.Cgroup, pid int) error {
return err return err
} }
if err := writeFile(path, "devices.deny", "a"); err != nil { if !c.AllowAllDevices {
return err if err := writeFile(path, "devices.deny", "a"); err != nil {
} return err
}
for _, dev := range c.AllowedDevices { }
if err := writeFile(path, "devices.allow", dev.GetCgroupAllowString()); err != nil { for _, dev := range c.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err return err
} }
} }
return nil return nil
} }
// Symmetrical public function to update device based cgroups. Also available // Symmetrical public function to update device based cgroups. Also available
// in the fs implementation. // in the fs implementation.
func ApplyDevices(c *cgroups.Cgroup, pid int) error { func ApplyDevices(c *configs.Cgroup, pid int) error {
return joinDevices(c, pid) return joinDevices(c, pid)
} }
func joinMemory(c *cgroups.Cgroup, pid int) error { func joinMemory(c *configs.Cgroup, pid int) error {
memorySwap := c.MemorySwap memorySwap := c.MemorySwap
if memorySwap == 0 { if memorySwap == 0 {
@ -305,7 +397,7 @@ func joinMemory(c *cgroups.Cgroup, pid int) error {
// systemd does not atm set up the cpuset controller, so we must manually // systemd does not atm set up the cpuset controller, so we must manually
// join it. Additionally that is a very finicky controller where each // join it. Additionally that is a very finicky controller where each
// level must have a full setup as the default for a new directory is "no cpus" // level must have a full setup as the default for a new directory is "no cpus"
func joinCpuset(c *cgroups.Cgroup, pid int) error { func joinCpuset(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "cpuset") path, err := getSubsystemPath(c, "cpuset")
if err != nil { if err != nil {
return err return err
@ -313,5 +405,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error {
s := &fs.CpusetGroup{} s := &fs.CpusetGroup{}
return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid) return s.ApplyDir(path, c, pid)
} }

View file

@ -1,154 +0,0 @@
package libcontainer
import (
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/mount"
"github.com/docker/libcontainer/network"
)
type MountConfig mount.MountConfig
type Network network.Network
type NamespaceType string
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
)
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Type NamespaceType `json:"type"`
Path string `json:"path,omitempty"`
}
type Namespaces []Namespace
func (n *Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
*n = append((*n)[:i], (*n)[i+1:]...)
return true
}
func (n *Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
*n = append(*n, Namespace{Type: t, Path: path})
return
}
(*n)[i].Path = path
}
func (n *Namespaces) index(t NamespaceType) int {
for i, ns := range *n {
if ns.Type == t {
return i
}
}
return -1
}
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// Mount specific options.
MountConfig *MountConfig `json:"mount_config,omitempty"`
// Pathname to container's root filesystem
RootFs string `json:"root_fs,omitempty"`
// Hostname optionally sets the container's hostname if provided
Hostname string `json:"hostname,omitempty"`
// User will set the uid and gid of the executing process running inside the container
User string `json:"user,omitempty"`
// WorkingDir will change the processes current working directory inside the container's rootfs
WorkingDir string `json:"working_dir,omitempty"`
// Env will populate the processes environment with the provided values
// Any values from the parent processes will be cleared before the values
// provided in Env are provided to the process
Env []string `json:"environment,omitempty"`
// Tty when true will allocate a pty slave on the host for access by the container's process
// and ensure that it is mounted inside the container's rootfs
Tty bool `json:"tty,omitempty"`
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces Namespaces `json:"namespaces,omitempty"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities,omitempty"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks,omitempty"`
// Routes can be specified to create entries in the route table as the container is started
Routes []*Route `json:"routes,omitempty"`
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"`
// AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed
AppArmorProfile string `json:"apparmor_profile,omitempty"`
// ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux
ProcessLabel string `json:"process_label,omitempty"`
// RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and
// /proc/bus
RestrictSys bool `json:"restrict_sys,omitempty"`
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits,omitempty"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []int `json:"additional_groups,omitempty"`
}
// Routes can be specified to create entries in the route table as the container is started
//
// All of destination, source, and gateway should be either IPv4 or IPv6.
// One of the three options must be present, and ommitted entries will use their
// IP family default for the route table. For IPv4 for example, setting the
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
// destination of 0.0.0.0(or *) when viewed in the route table.
type Route struct {
// Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
Destination string `json:"destination,omitempty"`
// Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
Source string `json:"source,omitempty"`
// Sets the gateway. Accepts IPv4 and IPv6
Gateway string `json:"gateway,omitempty"`
// The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name,omitempty"`
}
type Rlimit struct {
Type int `json:"type,omitempty"`
Hard uint64 `json:"hard,omitempty"`
Soft uint64 `json:"soft,omitempty"`
}

View file

@ -0,0 +1,57 @@
package configs
type FreezerState string
const (
Undefined FreezerState = ""
Frozen FreezerState = "FROZEN"
Thawed FreezerState = "THAWED"
)
type Cgroup struct {
Name string `json:"name"`
// name of parent cgroup or slice
Parent string `json:"parent"`
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
AllowAllDevices bool `json:"allow_all_devices"`
AllowedDevices []*Device `json:"allowed_devices"`
// Memory limit (in bytes)
Memory int64 `json:"memory"`
// Memory reservation or soft_limit (in bytes)
MemoryReservation int64 `json:"memory_reservation"`
// Total memory usage (memory + swap); set `-1' to disable swap
MemorySwap int64 `json:"memory_swap"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod int64 `json:"cpu_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
// MEM to use
CpusetMems string `json:"cpuset_mems"`
// Specifies per cgroup weight, range is from 10 to 1000.
BlkioWeight int64 `json:"blkio_weight"`
// set the freeze value for the process
Freezer FreezerState `json:"freezer"`
// Parent slice to use for systemd TODO: remove in favor or parent
Slice string `json:"slice"`
// Whether to disable OOM Killer
OomKillDisable bool `json:"oom_kill_disable"`
}

View file

@ -0,0 +1,145 @@
package configs
import "fmt"
type Rlimit struct {
Type int `json:"type"`
Hard uint64 `json:"hard"`
Soft uint64 `json:"soft"`
}
// IDMap represents UID/GID Mappings for User Namespaces.
type IDMap struct {
ContainerID int `json:"container_id"`
HostID int `json:"host_id"`
Size int `json:"size"`
}
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
// This is a common option when the container is running in ramdisk
NoPivotRoot bool `json:"no_pivot_root"`
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
// that the parent process dies.
ParentDeathSignal int `json:"parent_death_signal"`
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
PivotDir string `json:"pivot_dir"`
// Path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable.
Readonlyfs bool `json:"readonlyfs"`
// Mounts specify additional source and destination paths that will be mounted inside the container's
// rootfs and mount namespace if specified
Mounts []*Mount `json:"mounts"`
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
Devices []*Device `json:"devices"`
MountLabel string `json:"mount_label"`
// Hostname optionally sets the container's hostname if provided
Hostname string `json:"hostname"`
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces Namespaces `json:"namespaces"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks"`
// Routes can be specified to create entries in the route table as the container is started
Routes []*Route `json:"routes"`
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *Cgroup `json:"cgroups"`
// AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed
AppArmorProfile string `json:"apparmor_profile"`
// ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux
ProcessLabel string `json:"process_label"`
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []int `json:"additional_groups"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
// GidMappings is an array of Group ID mappings for User Namespaces
GidMappings []IDMap `json:"gid_mappings"`
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
// mount pointing to /dev/null as to prevent reads of the file.
MaskPaths []string `json:"mask_paths"`
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
// so that these files prevent any writes.
ReadonlyPaths []string `json:"readonly_paths"`
}
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View file

@ -1,12 +1,11 @@
package libcontainer package configs
import ( import (
"encoding/json" "encoding/json"
"fmt"
"os" "os"
"path/filepath" "path/filepath"
"testing" "testing"
"github.com/docker/libcontainer/devices"
) )
// Checks whether the expected capability is specified in the capabilities. // Checks whether the expected capability is specified in the capabilities.
@ -19,13 +18,13 @@ func contains(expected string, values []string) bool {
return false return false
} }
func containsDevice(expected *devices.Device, values []*devices.Device) bool { func containsDevice(expected *Device, values []*Device) bool {
for _, d := range values { for _, d := range values {
if d.Path == expected.Path && if d.Path == expected.Path &&
d.CgroupPermissions == expected.CgroupPermissions && d.Permissions == expected.Permissions &&
d.FileMode == expected.FileMode && d.FileMode == expected.FileMode &&
d.MajorNumber == expected.MajorNumber && d.Major == expected.Major &&
d.MinorNumber == expected.MinorNumber && d.Minor == expected.Minor &&
d.Type == expected.Type { d.Type == expected.Type {
return true return true
} }
@ -34,7 +33,7 @@ func containsDevice(expected *devices.Device, values []*devices.Device) bool {
} }
func loadConfig(name string) (*Config, error) { func loadConfig(name string) (*Config, error) {
f, err := os.Open(filepath.Join("sample_configs", name)) f, err := os.Open(filepath.Join("../sample_configs", name))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -45,6 +44,34 @@ func loadConfig(name string) (*Config, error) {
return nil, err return nil, err
} }
// Check that a config doesn't contain extra fields
var configMap, abstractMap map[string]interface{}
if _, err := f.Seek(0, 0); err != nil {
return nil, err
}
if err := json.NewDecoder(f).Decode(&abstractMap); err != nil {
return nil, err
}
configData, err := json.Marshal(&container)
if err != nil {
return nil, err
}
if err := json.Unmarshal(configData, &configMap); err != nil {
return nil, err
}
for k := range configMap {
delete(abstractMap, k)
}
if len(abstractMap) != 0 {
return nil, fmt.Errorf("unknown fields: %s", abstractMap)
}
return container, nil return container, nil
} }
@ -59,11 +86,6 @@ func TestConfigJsonFormat(t *testing.T) {
t.Fail() t.Fail()
} }
if !container.Tty {
t.Log("tty should be set to true")
t.Fail()
}
if !container.Namespaces.Contains(NEWNET) { if !container.Namespaces.Contains(NEWNET) {
t.Log("namespaces should contain NEWNET") t.Log("namespaces should contain NEWNET")
t.Fail() t.Fail()
@ -101,11 +123,6 @@ func TestConfigJsonFormat(t *testing.T) {
t.Fail() t.Fail()
} }
if n.VethPrefix != "veth" {
t.Logf("veth prefix should be veth but received %q", n.VethPrefix)
t.Fail()
}
if n.Gateway != "172.17.42.1" { if n.Gateway != "172.17.42.1" {
t.Logf("veth gateway should be 172.17.42.1 but received %q", n.Gateway) t.Logf("veth gateway should be 172.17.42.1 but received %q", n.Gateway)
t.Fail() t.Fail()
@ -119,18 +136,12 @@ func TestConfigJsonFormat(t *testing.T) {
break break
} }
} }
for _, d := range DefaultSimpleDevices {
for _, d := range devices.DefaultSimpleDevices { if !containsDevice(d, container.Devices) {
if !containsDevice(d, container.MountConfig.DeviceNodes) {
t.Logf("expected device configuration for %s", d.Path) t.Logf("expected device configuration for %s", d.Path)
t.Fail() t.Fail()
} }
} }
if !container.RestrictSys {
t.Log("expected restrict sys to be true")
t.Fail()
}
} }
func TestApparmorProfile(t *testing.T) { func TestApparmorProfile(t *testing.T) {
@ -154,8 +165,8 @@ func TestSelinuxLabels(t *testing.T) {
if container.ProcessLabel != label { if container.ProcessLabel != label {
t.Fatalf("expected process label %q but received %q", label, container.ProcessLabel) t.Fatalf("expected process label %q but received %q", label, container.ProcessLabel)
} }
if container.MountConfig.MountLabel != label { if container.MountLabel != label {
t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel) t.Fatalf("expected mount label %q but received %q", label, container.MountLabel)
} }
} }
@ -170,3 +181,69 @@ func TestRemoveNamespace(t *testing.T) {
t.Fatalf("namespaces should have 0 items but reports %d", len(ns)) t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
} }
} }
func TestHostUIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostUID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected uid 0 with no USERNS but received %d", uid)
}
}
func TestHostUIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
UidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostUID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected uid 1000 with no USERNS but received %d", uid)
}
}
func TestHostGIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostGID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected gid 0 with no USERNS but received %d", uid)
}
}
func TestHostGIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
GidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostGID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected gid 1000 with no USERNS but received %d", uid)
}
}

View file

@ -0,0 +1,52 @@
package configs
import (
"fmt"
"os"
)
const (
Wildcard = -1
)
type Device struct {
// Device type, block, char, etc.
Type rune `json:"type"`
// Path to the device.
Path string `json:"path"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Cgroup permissions format, rwm.
Permissions string `json:"permissions"`
// FileMode permission bits for the device.
FileMode os.FileMode `json:"file_mode"`
// Uid of the device.
Uid uint32 `json:"uid"`
// Gid of the device.
Gid uint32 `json:"gid"`
}
func (d *Device) CgroupString() string {
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
}
func (d *Device) Mkdev() int {
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
}
// deviceNumberString converts the device number to a string return result.
func deviceNumberString(number int64) string {
if number == Wildcard {
return "*"
}
return fmt.Sprint(number)
}

View file

@ -0,0 +1,137 @@
package configs
var (
// These are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/full",
Type: 'c',
Major: 1,
Minor: 7,
Permissions: "rwm",
FileMode: 0666,
},
// consoles and ttys
{
Path: "/dev/tty",
Type: 'c',
Major: 5,
Minor: 0,
Permissions: "rwm",
FileMode: 0666,
},
// /dev/urandom,/dev/random
{
Path: "/dev/urandom",
Type: 'c',
Major: 1,
Minor: 9,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/random",
Type: 'c',
Major: 1,
Minor: 8,
Permissions: "rwm",
FileMode: 0666,
},
}
DefaultAllowedDevices = append([]*Device{
// allow mknod for any device
{
Type: 'c',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Type: 'b',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
Major: 5,
Minor: 1,
Permissions: "rwm",
},
{
Path: "/dev/tty0",
Type: 'c',
Major: 4,
Minor: 0,
Permissions: "rwm",
},
{
Path: "/dev/tty1",
Type: 'c',
Major: 4,
Minor: 1,
Permissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
Major: 136,
Minor: Wildcard,
Permissions: "rwm",
},
{
Path: "",
Type: 'c',
Major: 5,
Minor: 2,
Permissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
Major: 10,
Minor: 200,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{
{
// /dev/fuse is created but not allowed.
// This is to allow java to work. Because java
// Insists on there being a /dev/fuse
// https://github.com/docker/docker/issues/514
// https://github.com/docker/docker/issues/2393
//
Path: "/dev/fuse",
Type: 'c',
Major: 10,
Minor: 229,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
)

View file

@ -0,0 +1,21 @@
package configs
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
// Destination path for the mount inside the container.
Destination string `json:"destination"`
// Device the mount is for.
Device string `json:"device"`
// Mount flags.
Flags int `json:"flags"`
// Mount data applied to the mount.
Data string `json:"data"`
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
}

View file

@ -0,0 +1,109 @@
package configs
import (
"fmt"
"syscall"
)
type NamespaceType string
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
)
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Type NamespaceType `json:"type"`
Path string `json:"path"`
}
func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type]
}
func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file())
}
func (n *Namespace) file() string {
file := ""
switch n.Type {
case NEWNET:
file = "net"
case NEWNS:
file = "mnt"
case NEWPID:
file = "pid"
case NEWIPC:
file = "ipc"
case NEWUSER:
file = "user"
case NEWUTS:
file = "uts"
}
return file
}
type Namespaces []Namespace
func (n *Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
*n = append((*n)[:i], (*n)[i+1:]...)
return true
}
func (n *Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
*n = append(*n, Namespace{Type: t, Path: path})
return
}
(*n)[i].Path = path
}
func (n *Namespaces) index(t NamespaceType) int {
for i, ns := range *n {
if ns.Type == t {
return i
}
}
return -1
}
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: syscall.CLONE_NEWNET,
NEWNS: syscall.CLONE_NEWNS,
NEWUSER: syscall.CLONE_NEWUSER,
NEWIPC: syscall.CLONE_NEWIPC,
NEWUTS: syscall.CLONE_NEWUTS,
NEWPID: syscall.CLONE_NEWPID,
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This functions returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
var flag int
for _, v := range *n {
if v.Path != "" {
continue
}
flag |= namespaceInfo[v.Type]
}
return uintptr(flag)
}

View file

@ -0,0 +1,66 @@
package configs
// Network defines configuration for a container's networking stack
//
// The network configuration can be omited from a container causing the
// container to be setup with the host's networking stack
type Network struct {
// Type sets the networks type, commonly veth and loopback
Type string `json:"type"`
// Name of the network interface
Name string `json:"name"`
// The bridge to use.
Bridge string `json:"bridge"`
// MacAddress contains the MAC address to set on the network interface
MacAddress string `json:"mac_address"`
// Address contains the IPv4 and mask to set on the network interface
Address string `json:"address"`
// Gateway sets the gateway address that is used as the default for the interface
Gateway string `json:"gateway"`
// IPv6Address contains the IPv6 and mask to set on the network interface
IPv6Address string `json:"ipv6_address"`
// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
IPv6Gateway string `json:"ipv6_gateway"`
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
Mtu int `json:"mtu"`
// TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
TxQueueLen int `json:"txqueuelen"`
// HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
// container.
HostInterfaceName string `json:"host_interface_name"`
}
// Routes can be specified to create entries in the route table as the container is started
//
// All of destination, source, and gateway should be either IPv4 or IPv6.
// One of the three options must be present, and ommitted entries will use their
// IP family default for the route table. For IPv4 for example, setting the
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
// destination of 0.0.0.0(or *) when viewed in the route table.
type Route struct {
// Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
Destination string `json:"destination"`
// Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
Source string `json:"source"`
// Sets the gateway. Accepts IPv4 and IPv6
Gateway string `json:"gateway"`
// The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name"`
}

View file

@ -0,0 +1,93 @@
package validate
import (
"fmt"
"os"
"path/filepath"
"github.com/docker/libcontainer/configs"
)
type Validator interface {
Validate(*configs.Config) error
}
func New() Validator {
return &ConfigValidator{}
}
type ConfigValidator struct {
}
func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.rootfs(config); err != nil {
return err
}
if err := v.network(config); err != nil {
return err
}
if err := v.hostname(config); err != nil {
return err
}
if err := v.security(config); err != nil {
return err
}
if err := v.usernamespace(config); err != nil {
return err
}
return nil
}
// rootfs validates the the rootfs is an absolute path and is not a symlink
// to the container's root filesystem.
func (v *ConfigValidator) rootfs(config *configs.Config) error {
cleaned, err := filepath.Abs(config.Rootfs)
if err != nil {
return err
}
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
return err
}
if config.Rootfs != cleaned {
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
}
return nil
}
func (v *ConfigValidator) network(config *configs.Config) error {
if !config.Namespaces.Contains(configs.NEWNET) {
if len(config.Networks) > 0 || len(config.Routes) > 0 {
return fmt.Errorf("unable to apply network settings without a private NET namespace")
}
}
return nil
}
func (v *ConfigValidator) hostname(config *configs.Config) error {
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
return fmt.Errorf("unable to set hostname without a private UTS namespace")
}
return nil
}
func (v *ConfigValidator) security(config *configs.Config) error {
// restrict sys without mount namespace
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
return nil
}
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
if config.Namespaces.Contains(configs.NEWUSER) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
}
} else {
if config.UidMappings != nil || config.GidMappings != nil {
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
}
}
return nil
}

View file

@ -0,0 +1,15 @@
package libcontainer
import "io"
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriter
io.Closer
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
Fd() uintptr
}

View file

@ -1,128 +0,0 @@
// +build linux
package console
import (
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
"github.com/docker/libcontainer/label"
)
// Setup initializes the proper /dev/console inside the rootfs path
func Setup(rootfs, consolePath, mountLabel string) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := os.Chmod(consolePath, 0600); err != nil {
return err
}
if err := os.Chown(consolePath, 0, 0); err != nil {
return err
}
if err := label.SetFileLabel(consolePath, mountLabel); err != nil {
return fmt.Errorf("set file label %s %s", consolePath, err)
}
dest := filepath.Join(rootfs, "dev/console")
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return fmt.Errorf("create %s %s", dest, err)
}
if f != nil {
f.Close()
}
if err := syscall.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("bind %s to %s %s", consolePath, dest, err)
}
return nil
}
func OpenAndDup(consolePath string) error {
slave, err := OpenTerminal(consolePath, syscall.O_RDWR)
if err != nil {
return fmt.Errorf("open terminal %s", err)
}
if err := syscall.Dup2(int(slave.Fd()), 0); err != nil {
return err
}
if err := syscall.Dup2(int(slave.Fd()), 1); err != nil {
return err
}
return syscall.Dup2(int(slave.Fd()), 2)
}
// Unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// Unlockpt should be called before opening the slave side of a pseudoterminal.
func Unlockpt(f *os.File) error {
var u int32
return Ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// Ptsname retrieves the name of the first available pts for the given master.
func Ptsname(f *os.File) (string, error) {
var n int32
if err := Ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}
// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the
// pts name for use as the pty slave inside the container
func CreateMasterAndConsole() (*os.File, string, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, "", err
}
console, err := Ptsname(master)
if err != nil {
return nil, "", err
}
if err := Unlockpt(master); err != nil {
return nil, "", err
}
return master, console, nil
}
// OpenPtmx opens /dev/ptmx, i.e. the PTY master.
func OpenPtmx() (*os.File, error) {
// O_NOCTTY and O_CLOEXEC are not present in os package so we use the syscall's one for all.
return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
}
// OpenTerminal is a clone of os.OpenFile without the O_CLOEXEC
// used to open the pty slave inside the container namespace
func OpenTerminal(name string, flag int) (*os.File, error) {
r, e := syscall.Open(name, flag, 0)
if e != nil {
return nil, &os.PathError{Op: "open", Path: name, Err: e}
}
return os.NewFile(uintptr(r), name), nil
}
func Ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}

View file

@ -0,0 +1,147 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
"github.com/docker/libcontainer/label"
)
// newConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// newConsoleFromPath is an internal fucntion returning an initialzied console for use inside
// a container's MNT namespace.
func newConsoleFromPath(slavePath string) *linuxConsole {
return &linuxConsole{
slavePath: slavePath,
}
}
// linuxConsole is a linux psuedo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) Fd() uintptr {
return c.master.Fd()
}
func (c *linuxConsole) Path() string {
return c.slavePath
}
func (c *linuxConsole) Read(b []byte) (int, error) {
return c.master.Read(b)
}
func (c *linuxConsole) Write(b []byte) (int, error) {
return c.master.Write(b)
}
func (c *linuxConsole) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dup2s the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(syscall.O_RDWR)
if err != nil {
return err
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := syscall.Dup2(fd, i); err != nil {
return err
}
}
return nil
}
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := syscall.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
Path: c.slavePath,
Err: e,
}
}
return os.NewFile(uintptr(r), c.slavePath), nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var n int32
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

View file

@ -1,8 +1,53 @@
/* // Libcontainer provides a native Go implementation for creating containers
NOTE: The API is in flux and mainly not implemented. Proceed with caution until further notice. // with namespaces, cgroups, capabilities, and filesystem access controls.
*/ // It allows you to manage the lifecycle of the container performing additional operations
// after the container is created.
package libcontainer package libcontainer
import (
"github.com/docker/libcontainer/configs"
)
// The status of a container.
type Status int
const (
// The container exists and is running.
Running Status = iota + 1
// The container exists, it is in the process of being paused.
Pausing
// The container exists, but all its processes are paused.
Paused
// The container does not exist.
Destroyed
)
// State represents a running container's state
type State struct {
// ID is the container ID.
ID string `json:"id"`
// InitProcessPid is the init process id in the parent namespace.
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time.
InitProcessStartTime string `json:"init_process_start"`
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path.
CgroupPaths map[string]string `json:"cgroup_paths"`
// NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
// with the value as the path.
NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"`
// Config is the container's configuration.
Config configs.Config `json:"config"`
}
// A libcontainer container object. // A libcontainer container object.
// //
// Each container is thread-safe within the same process. Since a container can // Each container is thread-safe within the same process. Since a container can
@ -12,67 +57,88 @@ type Container interface {
// Returns the ID of the container // Returns the ID of the container
ID() string ID() string
// Returns the current run state of the container. // Returns the current status of the container.
// //
// Errors: // errors:
// ContainerDestroyed - Container no longer exists, // ContainerDestroyed - Container no longer exists,
// SystemError - System error. // Systemerror - System error.
RunState() (*RunState, Error) Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// Systemerror - System erroor.
State() (*State, error)
// Returns the current config of the container. // Returns the current config of the container.
Config() *Config Config() configs.Config
// Start a process inside the container. Returns the PID of the new process (in the caller process's namespace) and a channel that will return the exit status of the process whenever it dies. // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
// //
// Errors: // errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
// the Container state is PAUSED in which case every PID in the slice is valid.
Processes() ([]int, error)
// Returns statistics for the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Stats() (*Stats, error)
// Set cgroup resources of container as configured
//
// We can use this to change resources when containers are running.
//
// errors:
// Systemerror - System error.
Set() error
// Start a process inside the container. Returns error if process fails to
// start. You can track process lifecycle with passed Process structure.
//
// errors:
// ContainerDestroyed - Container no longer exists, // ContainerDestroyed - Container no longer exists,
// ConfigInvalid - config is invalid, // ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused, // ContainerPaused - Container is paused,
// SystemError - System error. // Systemerror - System error.
Start(config *ProcessConfig) (pid int, exitChan chan int, err Error) Start(process *Process) (err error)
// Destroys the container after killing all running processes. // Destroys the container after killing all running processes.
// //
// Any event registrations are removed before the container is destroyed. // Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed. // No error is returned if the container is already destroyed.
// //
// Errors: // errors:
// SystemError - System error. // Systemerror - System error.
Destroy() Error Destroy() error
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
//
// Errors:
// ContainerDestroyed - Container no longer exists,
// SystemError - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
// the Container state is PAUSED in which case every PID in the slice is valid.
Processes() ([]int, Error)
// Returns statistics for the container.
//
// Errors:
// ContainerDestroyed - Container no longer exists,
// SystemError - System error.
Stats() (*ContainerStats, Error)
// If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses // If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses
// the execution of any user processes. Asynchronously, when the container finished being paused the // the execution of any user processes. Asynchronously, when the container finished being paused the
// state is changed to PAUSED. // state is changed to PAUSED.
// If the Container state is PAUSED, do nothing. // If the Container state is PAUSED, do nothing.
// //
// Errors: // errors:
// ContainerDestroyed - Container no longer exists, // ContainerDestroyed - Container no longer exists,
// SystemError - System error. // Systemerror - System error.
Pause() Error Pause() error
// If the Container state is PAUSED, resumes the execution of any user processes in the // If the Container state is PAUSED, resumes the execution of any user processes in the
// Container before setting the Container state to RUNNING. // Container before setting the Container state to RUNNING.
// If the Container state is RUNNING, do nothing. // If the Container state is RUNNING, do nothing.
// //
// Errors: // errors:
// ContainerDestroyed - Container no longer exists, // ContainerDestroyed - Container no longer exists,
// SystemError - System error. // Systemerror - System error.
Resume() Error Resume() error
// NotifyOOM returns a read-only channel signaling when the container receives an OOM notification.
//
// errors:
// Systemerror - System error.
NotifyOOM() (<-chan struct{}, error)
} }

View file

@ -0,0 +1,306 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"sync"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
)
type linuxContainer struct {
id string
root string
config *configs.Config
cgroupManager cgroups.Manager
initPath string
initArgs []string
initProcess parentProcess
m sync.Mutex
}
// ID returns the container's unique ID
func (c *linuxContainer) ID() string {
return c.id
}
// Config returns the container's configuration
func (c *linuxContainer) Config() configs.Config {
return *c.config
}
func (c *linuxContainer) Status() (Status, error) {
c.m.Lock()
defer c.m.Unlock()
return c.currentStatus()
}
func (c *linuxContainer) State() (*State, error) {
c.m.Lock()
defer c.m.Unlock()
return c.currentState()
}
func (c *linuxContainer) Processes() ([]int, error) {
pids, err := c.cgroupManager.GetPids()
if err != nil {
return nil, newSystemError(err)
}
return pids, nil
}
func (c *linuxContainer) Stats() (*Stats, error) {
var (
err error
stats = &Stats{}
)
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemError(err)
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
if err != nil {
return stats, newSystemError(err)
}
stats.Interfaces = append(stats.Interfaces, istats)
}
}
return stats, nil
}
func (c *linuxContainer) Set() error {
c.m.Lock()
defer c.m.Unlock()
return c.cgroupManager.Set(c.config)
}
func (c *linuxContainer) Start(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
doInit := status == Destroyed
parent, err := c.newParentProcess(process, doInit)
if err != nil {
return newSystemError(err)
}
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := parent.terminate(); err != nil {
log.Warn(err)
}
return newSystemError(err)
}
process.ops = parent
if doInit {
c.updateState(parent)
}
return nil
}
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
parentPipe, childPipe, err := newPipe()
if err != nil {
return nil, newSystemError(err)
}
cmd, err := c.commandTemplate(p, childPipe)
if err != nil {
return nil, newSystemError(err)
}
if !doInit {
return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil
}
return c.newInitProcess(p, cmd, parentPipe, childPipe)
}
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
cmd := &exec.Cmd{
Path: c.initPath,
Args: c.initArgs,
}
cmd.Stdin = p.Stdin
cmd.Stdout = p.Stdout
cmd.Stderr = p.Stderr
cmd.Dir = c.config.Rootfs
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.ExtraFiles = []*os.File{childPipe}
cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
if c.config.ParentDeathSignal > 0 {
cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
}
return cmd, nil
}
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
t := "_LIBCONTAINER_INITTYPE=standard"
cloneFlags := c.config.Namespaces.CloneFlags()
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil {
// user mappings are not supported
return nil, err
}
// Default to root user when user namespaces are enabled.
if cmd.SysProcAttr.Credential == nil {
cmd.SysProcAttr.Credential = &syscall.Credential{}
}
}
cmd.Env = append(cmd.Env, t)
cmd.SysProcAttr.Cloneflags = cloneFlags
return &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
}, nil
}
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess {
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()),
"_LIBCONTAINER_INITTYPE=setns",
)
if p.consolePath != "" {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_CONSOLE_PATH="+p.consolePath)
}
// TODO: set on container for process management
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
childPipe: childPipe,
parentPipe: parentPipe,
config: c.newInitConfig(p),
}
}
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
return &initConfig{
Config: c.config,
Args: process.Args,
Env: process.Env,
User: process.User,
Cwd: process.Cwd,
Console: process.consolePath,
}
}
func newPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
func (c *linuxContainer) Destroy() error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
if status != Destroyed {
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
}
if !c.config.Namespaces.Contains(configs.NEWPID) {
if err := killCgroupProcesses(c.cgroupManager); err != nil {
log.Warn(err)
}
}
err = c.cgroupManager.Destroy()
if rerr := os.RemoveAll(c.root); err == nil {
err = rerr
}
c.initProcess = nil
return err
}
func (c *linuxContainer) Pause() error {
c.m.Lock()
defer c.m.Unlock()
return c.cgroupManager.Freeze(configs.Frozen)
}
func (c *linuxContainer) Resume() error {
c.m.Lock()
defer c.m.Unlock()
return c.cgroupManager.Freeze(configs.Thawed)
}
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
return notifyOnOOM(c.cgroupManager.GetPaths())
}
func (c *linuxContainer) updateState(process parentProcess) error {
c.initProcess = process
state, err := c.currentState()
if err != nil {
return err
}
f, err := os.Create(filepath.Join(c.root, stateFilename))
if err != nil {
return err
}
defer f.Close()
return json.NewEncoder(f).Encode(state)
}
func (c *linuxContainer) currentStatus() (Status, error) {
if c.initProcess == nil {
return Destroyed, nil
}
// return Running if the init process is alive
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
if err == syscall.ESRCH {
return Destroyed, nil
}
return 0, newSystemError(err)
}
if c.config.Cgroups != nil && c.config.Cgroups.Freezer == configs.Frozen {
return Paused, nil
}
return Running, nil
}
func (c *linuxContainer) currentState() (*State, error) {
status, err := c.currentStatus()
if err != nil {
return nil, err
}
if status == Destroyed {
return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists)
}
startTime, err := c.initProcess.startTime()
if err != nil {
return nil, newSystemError(err)
}
state := &State{
ID: c.ID(),
Config: *c.config,
InitProcessPid: c.initProcess.pid(),
InitProcessStartTime: startTime,
CgroupPaths: c.cgroupManager.GetPaths(),
NamespacePaths: make(map[configs.NamespaceType]string),
}
for _, ns := range c.config.Namespaces {
state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid())
}
return state, nil
}

View file

@ -0,0 +1,200 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"testing"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
)
type mockCgroupManager struct {
pids []int
stats *cgroups.Stats
paths map[string]string
}
func (m *mockCgroupManager) GetPids() ([]int, error) {
return m.pids, nil
}
func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) {
return m.stats, nil
}
func (m *mockCgroupManager) Apply(pid int) error {
return nil
}
func (m *mockCgroupManager) Set(container *configs.Config) error {
return nil
}
func (m *mockCgroupManager) Destroy() error {
return nil
}
func (m *mockCgroupManager) GetPaths() map[string]string {
return m.paths
}
func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
return nil
}
type mockProcess struct {
_pid int
started string
}
func (m *mockProcess) terminate() error {
return nil
}
func (m *mockProcess) pid() int {
return m._pid
}
func (m *mockProcess) startTime() (string, error) {
return m.started, nil
}
func (m *mockProcess) start() error {
return nil
}
func (m *mockProcess) wait() (*os.ProcessState, error) {
return nil, nil
}
func (m *mockProcess) signal(_ os.Signal) error {
return nil
}
func TestGetContainerPids(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{pids: []int{1, 2, 3}},
}
pids, err := container.Processes()
if err != nil {
t.Fatal(err)
}
for i, expected := range []int{1, 2, 3} {
if pids[i] != expected {
t.Fatalf("expected pid %d but received %d", expected, pids[i])
}
}
}
func TestGetContainerStats(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: 1024,
},
},
},
}
stats, err := container.Stats()
if err != nil {
t.Fatal(err)
}
if stats.CgroupStats == nil {
t.Fatal("cgroup stats are nil")
}
if stats.CgroupStats.MemoryStats.Usage != 1024 {
t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage)
}
}
func TestGetContainerState(t *testing.T) {
var (
pid = os.Getpid()
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
expectedNetworkPath = "/networks/fd"
)
container := &linuxContainer{
id: "myid",
config: &configs.Config{
Namespaces: configs.Namespaces{
{Type: configs.NEWPID},
{Type: configs.NEWNS},
{Type: configs.NEWNET, Path: expectedNetworkPath},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
},
},
initProcess: &mockProcess{
_pid: pid,
started: "010",
},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: 1024,
},
},
paths: map[string]string{
"memory": expectedMemoryPath,
},
},
}
state, err := container.State()
if err != nil {
t.Fatal(err)
}
if state.InitProcessPid != pid {
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
}
if state.InitProcessStartTime != "010" {
t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime)
}
paths := state.CgroupPaths
if paths == nil {
t.Fatal("cgroup paths should not be nil")
}
if memPath := paths["memory"]; memPath != expectedMemoryPath {
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
}
for _, ns := range container.config.Namespaces {
path := state.NamespacePaths[ns.Type]
if path == "" {
t.Fatalf("expected non nil namespace path for %s", ns.Type)
}
if ns.Type == configs.NEWNET {
if path != expectedNetworkPath {
t.Fatalf("expected path %q but received %q", expectedNetworkPath, path)
}
} else {
file := ""
switch ns.Type {
case configs.NEWNET:
file = "net"
case configs.NEWNS:
file = "mnt"
case configs.NEWPID:
file = "pid"
case configs.NEWIPC:
file = "ipc"
case configs.NEWUSER:
file = "user"
case configs.NEWUTS:
file = "uts"
}
expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file)
if expected != path {
t.Fatalf("expected path %q but received %q", expected, path)
}
}
}
}

View file

@ -0,0 +1,13 @@
// +build !go1.4
package libcontainer
import (
"fmt"
"syscall"
)
// not available before go 1.4
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error {
return fmt.Errorf("User namespace is not supported in golang < 1.4")
}

View file

@ -0,0 +1,26 @@
// +build go1.4
package libcontainer
import "syscall"
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error {
if c.config.UidMappings != nil {
sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings))
for i, um := range c.config.UidMappings {
sys.UidMappings[i].ContainerID = um.ContainerID
sys.UidMappings[i].HostID = um.HostID
sys.UidMappings[i].Size = um.Size
}
}
if c.config.GidMappings != nil {
sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings))
for i, gm := range c.config.GidMappings {
sys.GidMappings[i].ContainerID = gm.ContainerID
sys.GidMappings[i].HostID = gm.HostID
sys.GidMappings[i].Size = gm.Size
}
}
return nil
}

View file

@ -1,159 +0,0 @@
package devices
var (
// These are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
Path: "/dev/null",
Type: 'c',
MajorNumber: 1,
MinorNumber: 3,
CgroupPermissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/zero",
Type: 'c',
MajorNumber: 1,
MinorNumber: 5,
CgroupPermissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/full",
Type: 'c',
MajorNumber: 1,
MinorNumber: 7,
CgroupPermissions: "rwm",
FileMode: 0666,
},
// consoles and ttys
{
Path: "/dev/tty",
Type: 'c',
MajorNumber: 5,
MinorNumber: 0,
CgroupPermissions: "rwm",
FileMode: 0666,
},
// /dev/urandom,/dev/random
{
Path: "/dev/urandom",
Type: 'c',
MajorNumber: 1,
MinorNumber: 9,
CgroupPermissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/random",
Type: 'c',
MajorNumber: 1,
MinorNumber: 8,
CgroupPermissions: "rwm",
FileMode: 0666,
},
}
DefaultAllowedDevices = append([]*Device{
// allow mknod for any device
{
Type: 'c',
MajorNumber: Wildcard,
MinorNumber: Wildcard,
CgroupPermissions: "m",
},
{
Type: 'b',
MajorNumber: Wildcard,
MinorNumber: Wildcard,
CgroupPermissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
MajorNumber: 5,
MinorNumber: 1,
CgroupPermissions: "rwm",
},
{
Path: "/dev/tty0",
Type: 'c',
MajorNumber: 4,
MinorNumber: 0,
CgroupPermissions: "rwm",
},
{
Path: "/dev/tty1",
Type: 'c',
MajorNumber: 4,
MinorNumber: 1,
CgroupPermissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
MajorNumber: 136,
MinorNumber: Wildcard,
CgroupPermissions: "rwm",
},
{
Path: "",
Type: 'c',
MajorNumber: 5,
MinorNumber: 2,
CgroupPermissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
MajorNumber: 10,
MinorNumber: 200,
CgroupPermissions: "rwm",
},
/*// fuse
{
Path: "",
Type: 'c',
MajorNumber: 10,
MinorNumber: 229,
CgroupPermissions: "rwm",
},
// rtc
{
Path: "",
Type: 'c',
MajorNumber: 254,
MinorNumber: 0,
CgroupPermissions: "rwm",
},
*/
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{
{
// /dev/fuse is created but not allowed.
// This is to allow java to work. Because java
// Insists on there being a /dev/fuse
// https://github.com/docker/docker/issues/514
// https://github.com/docker/docker/issues/2393
//
Path: "/dev/fuse",
Type: 'c',
MajorNumber: 10,
MinorNumber: 229,
CgroupPermissions: "rwm",
},
}, DefaultSimpleDevices...)
)

View file

@ -7,14 +7,12 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"syscall" "syscall"
)
const ( "github.com/docker/libcontainer/configs"
Wildcard = -1
) )
var ( var (
ErrNotADeviceNode = errors.New("not a device node") ErrNotADevice = errors.New("not a device node")
) )
// Testing dependencies // Testing dependencies
@ -23,45 +21,20 @@ var (
ioutilReadDir = ioutil.ReadDir ioutilReadDir = ioutil.ReadDir
) )
type Device struct {
Type rune `json:"type,omitempty"`
Path string `json:"path,omitempty"` // It is fine if this is an empty string in the case that you are using Wildcards
MajorNumber int64 `json:"major_number,omitempty"` // Use the wildcard constant for wildcards.
MinorNumber int64 `json:"minor_number,omitempty"` // Use the wildcard constant for wildcards.
CgroupPermissions string `json:"cgroup_permissions,omitempty"` // Typically just "rwm"
FileMode os.FileMode `json:"file_mode,omitempty"` // The permission bits of the file's mode
Uid uint32 `json:"uid,omitempty"`
Gid uint32 `json:"gid,omitempty"`
}
func GetDeviceNumberString(deviceNumber int64) string {
if deviceNumber == Wildcard {
return "*"
} else {
return fmt.Sprintf("%d", deviceNumber)
}
}
func (device *Device) GetCgroupAllowString() string {
return fmt.Sprintf("%c %s:%s %s", device.Type, GetDeviceNumberString(device.MajorNumber), GetDeviceNumberString(device.MinorNumber), device.CgroupPermissions)
}
// Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct. // Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct.
func GetDevice(path, cgroupPermissions string) (*Device, error) { func DeviceFromPath(path, permissions string) (*configs.Device, error) {
fileInfo, err := osLstat(path) fileInfo, err := osLstat(path)
if err != nil { if err != nil {
return nil, err return nil, err
} }
var ( var (
devType rune devType rune
mode = fileInfo.Mode() mode = fileInfo.Mode()
fileModePermissionBits = os.FileMode.Perm(mode) fileModePermissionBits = os.FileMode.Perm(mode)
) )
switch { switch {
case mode&os.ModeDevice == 0: case mode&os.ModeDevice == 0:
return nil, ErrNotADeviceNode return nil, ErrNotADevice
case mode&os.ModeCharDevice != 0: case mode&os.ModeCharDevice != 0:
fileModePermissionBits |= syscall.S_IFCHR fileModePermissionBits |= syscall.S_IFCHR
devType = 'c' devType = 'c'
@ -69,36 +42,33 @@ func GetDevice(path, cgroupPermissions string) (*Device, error) {
fileModePermissionBits |= syscall.S_IFBLK fileModePermissionBits |= syscall.S_IFBLK
devType = 'b' devType = 'b'
} }
stat_t, ok := fileInfo.Sys().(*syscall.Stat_t) stat_t, ok := fileInfo.Sys().(*syscall.Stat_t)
if !ok { if !ok {
return nil, fmt.Errorf("cannot determine the device number for device %s", path) return nil, fmt.Errorf("cannot determine the device number for device %s", path)
} }
devNumber := int(stat_t.Rdev) devNumber := int(stat_t.Rdev)
return &configs.Device{
return &Device{ Type: devType,
Type: devType, Path: path,
Path: path, Major: Major(devNumber),
MajorNumber: Major(devNumber), Minor: Minor(devNumber),
MinorNumber: Minor(devNumber), Permissions: permissions,
CgroupPermissions: cgroupPermissions, FileMode: fileModePermissionBits,
FileMode: fileModePermissionBits, Uid: stat_t.Uid,
Uid: stat_t.Uid, Gid: stat_t.Gid,
Gid: stat_t.Gid,
}, nil }, nil
} }
func GetHostDeviceNodes() ([]*Device, error) { func HostDevices() ([]*configs.Device, error) {
return getDeviceNodes("/dev") return getDevices("/dev")
} }
func getDeviceNodes(path string) ([]*Device, error) { func getDevices(path string) ([]*configs.Device, error) {
files, err := ioutilReadDir(path) files, err := ioutilReadDir(path)
if err != nil { if err != nil {
return nil, err return nil, err
} }
out := []*configs.Device{}
out := []*Device{}
for _, f := range files { for _, f := range files {
switch { switch {
case f.IsDir(): case f.IsDir():
@ -106,7 +76,7 @@ func getDeviceNodes(path string) ([]*Device, error) {
case "pts", "shm", "fd", "mqueue": case "pts", "shm", "fd", "mqueue":
continue continue
default: default:
sub, err := getDeviceNodes(filepath.Join(path, f.Name())) sub, err := getDevices(filepath.Join(path, f.Name()))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -117,16 +87,14 @@ func getDeviceNodes(path string) ([]*Device, error) {
case f.Name() == "console": case f.Name() == "console":
continue continue
} }
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
device, err := GetDevice(filepath.Join(path, f.Name()), "rwm")
if err != nil { if err != nil {
if err == ErrNotADeviceNode { if err == ErrNotADevice {
continue continue
} }
return nil, err return nil, err
} }
out = append(out, device) out = append(out, device)
} }
return out, nil return out, nil
} }

View file

@ -6,7 +6,7 @@ import (
"testing" "testing"
) )
func TestGetDeviceLstatFailure(t *testing.T) { func TestDeviceFromPathLstatFailure(t *testing.T) {
testError := errors.New("test error") testError := errors.New("test error")
// Override os.Lstat to inject error. // Override os.Lstat to inject error.
@ -14,13 +14,13 @@ func TestGetDeviceLstatFailure(t *testing.T) {
return nil, testError return nil, testError
} }
_, err := GetDevice("", "") _, err := DeviceFromPath("", "")
if err != testError { if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError) t.Fatalf("Unexpected error %v, expected %v", err, testError)
} }
} }
func TestGetHostDeviceNodesIoutilReadDirFailure(t *testing.T) { func TestHostDevicesIoutilReadDirFailure(t *testing.T) {
testError := errors.New("test error") testError := errors.New("test error")
// Override ioutil.ReadDir to inject error. // Override ioutil.ReadDir to inject error.
@ -28,13 +28,13 @@ func TestGetHostDeviceNodesIoutilReadDirFailure(t *testing.T) {
return nil, testError return nil, testError
} }
_, err := GetHostDeviceNodes() _, err := HostDevices()
if err != testError { if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError) t.Fatalf("Unexpected error %v, expected %v", err, testError)
} }
} }
func TestGetHostDeviceNodesIoutilReadDirDeepFailure(t *testing.T) { func TestHostDevicesIoutilReadDirDeepFailure(t *testing.T) {
testError := errors.New("test error") testError := errors.New("test error")
called := false called := false
@ -54,7 +54,7 @@ func TestGetHostDeviceNodesIoutilReadDirDeepFailure(t *testing.T) {
return []os.FileInfo{fi}, nil return []os.FileInfo{fi}, nil
} }
_, err := GetHostDeviceNodes() _, err := HostDevices()
if err != testError { if err != testError {
t.Fatalf("Unexpected error %v, expected %v", err, testError) t.Fatalf("Unexpected error %v, expected %v", err, testError)
} }

View file

@ -20,7 +20,3 @@ func Major(devNumber int) int64 {
func Minor(devNumber int) int64 { func Minor(devNumber int) int64 {
return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)) return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00))
} }
func Mkdev(majorNumber int64, minorNumber int64) int {
return int((majorNumber << 8) | (minorNumber & 0xff) | ((minorNumber & 0xfff00) << 12))
}

View file

@ -0,0 +1,38 @@
% nsinit User Manual
% docker/libcontainer
% JAN 2015
NAME:
nsinit - A low-level utility for managing containers.
It is used to spawn new containers or join existing containers.
USAGE:
nsinit [global options] command [command options] [arguments...]
VERSION:
0.1
COMMANDS:
config display the container configuration
exec execute a new command inside a container
init runs the init process inside the namespace
oom display oom notifications for a container
pause pause the container's processes
stats display statistics for the container
unpause unpause the container's processes
help, h shows a list of commands or help for one command
EXAMPLES:
Get the <container_id> of an already running docker container.
`sudo docker ps` will return the list of all the running containers.
take the <container_id> (e.g. 4addb0b2d307) and go to its config directory
`/var/lib/docker/execdriver/native/4addb0b2d307` and here you can run the nsinit
command line utility.
e.g. `nsinit exec /bin/bash` will start a shell on the already running container.
# HISTORY
Jan 2015, Originally compiled by Shishir Mahajan (shishir dot mahajan at redhat dot com)
based on nsinit source material and internal work.

View file

@ -1,5 +1,7 @@
package libcontainer package libcontainer
import "io"
// API error code type. // API error code type.
type ErrorCode int type ErrorCode int
@ -8,29 +10,52 @@ const (
// Factory errors // Factory errors
IdInUse ErrorCode = iota IdInUse ErrorCode = iota
InvalidIdFormat InvalidIdFormat
// TODO: add Load errors
// Container errors // Container errors
ContainerDestroyed ContainerNotExists
ContainerPaused ContainerPaused
ContainerNotStopped
ContainerNotRunning
// Process errors
ProcessNotExecuted
// Common errors // Common errors
ConfigInvalid ConfigInvalid
SystemError SystemError
) )
func (c ErrorCode) String() string {
switch c {
case IdInUse:
return "Id already in use"
case InvalidIdFormat:
return "Invalid format"
case ContainerPaused:
return "Container paused"
case ConfigInvalid:
return "Invalid configuration"
case SystemError:
return "System error"
case ContainerNotExists:
return "Container does not exist"
case ContainerNotStopped:
return "Container is not stopped"
case ContainerNotRunning:
return "Container is not running"
default:
return "Unknown error"
}
}
// API Error type. // API Error type.
type Error interface { type Error interface {
error error
// Returns the stack trace, if any, which identifies the
// point at which the error occurred.
Stack() []byte
// Returns a verbose string including the error message // Returns a verbose string including the error message
// and a representation of the stack trace suitable for // and a representation of the stack trace suitable for
// printing. // printing.
Detail() string Detail(w io.Writer) error
// Returns the error code for this error. // Returns the error code for this error.
Code() ErrorCode Code() ErrorCode

View file

@ -0,0 +1,20 @@
package libcontainer
import "testing"
func TestErrorCode(t *testing.T) {
codes := map[ErrorCode]string{
IdInUse: "Id already in use",
InvalidIdFormat: "Invalid format",
ContainerPaused: "Container paused",
ConfigInvalid: "Invalid configuration",
SystemError: "System error",
ContainerNotExists: "Container does not exist",
}
for code, expected := range codes {
if actual := code.String(); actual != expected {
t.Fatalf("expected string %q but received %q", expected, actual)
}
}
}

View file

@ -1,7 +1,10 @@
package libcontainer package libcontainer
type Factory interface { import (
"github.com/docker/libcontainer/configs"
)
type Factory interface {
// Creates a new container with the given id and starts the initial process inside it. // Creates a new container with the given id and starts the initial process inside it.
// id must be a string containing only letters, digits and underscores and must contain // id must be a string containing only letters, digits and underscores and must contain
// between 1 and 1024 characters, inclusive. // between 1 and 1024 characters, inclusive.
@ -11,22 +14,31 @@ type Factory interface {
// //
// Returns the new container with a running process. // Returns the new container with a running process.
// //
// Errors: // errors:
// IdInUse - id is already in use by a container // IdInUse - id is already in use by a container
// InvalidIdFormat - id has incorrect format // InvalidIdFormat - id has incorrect format
// ConfigInvalid - config is invalid // ConfigInvalid - config is invalid
// SystemError - System error // Systemerror - System error
// //
// On error, any partially created container parts are cleaned up (the operation is atomic). // On error, any partially created container parts are cleaned up (the operation is atomic).
Create(id string, config *Config) (Container, Error) Create(id string, config *configs.Config) (Container, error)
// Load takes an ID for an existing container and reconstructs the container // Load takes an ID for an existing container and returns the container information
// from the state. // from the state. This presents a read only view of the container.
// //
// Errors: // errors:
// Path does not exist // Path does not exist
// Container is stopped // Container is stopped
// System error // System error
// TODO: fix description Load(id string) (Container, error)
Load(id string) (Container, Error)
// StartInitialization is an internal API to libcontainer used during the rexec of the
// container. pipefd is the fd to the child end of the pipe used to syncronize the
// parent and child process providing state and configuration to the child process and
// returning any errors during the init of the container
//
// Errors:
// pipe connection error
// system error
StartInitialization(pipefd uintptr) error
} }

View file

@ -0,0 +1,262 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"regexp"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/configs/validate"
)
const (
stateFilename = "state.json"
)
var (
idRegex = regexp.MustCompile(`^[\w_]+$`)
maxIdLen = 1024
)
// InitArgs returns an options func to configure a LinuxFactory with the
// provided init arguments.
func InitArgs(args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
name := args[0]
if filepath.Base(name) == name {
if lp, err := exec.LookPath(name); err == nil {
name = lp
}
}
l.InitPath = name
l.InitArgs = append([]string{name}, args[1:]...)
return nil
}
}
// InitPath returns an options func to configure a LinuxFactory with the
// provided absolute path to the init binary and arguements.
func InitPath(path string, args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.InitPath = path
l.InitArgs = args
return nil
}
}
// SystemdCgroups is an options func to configure a LinuxFactory to return
// containers that use systemd to create and manage cgroups.
func SystemdCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &systemd.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// Cgroupfs is an options func to configure a LinuxFactory to return
// containers that use the native cgroups filesystem implementation to
// create and manage cgroups.
func Cgroupfs(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &fs.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
if root != "" {
if err := os.MkdirAll(root, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
}
l := &LinuxFactory{
Root: root,
Validator: validate.New(),
}
InitArgs(os.Args[0], "init")(l)
Cgroupfs(l)
for _, opt := range options {
if err := opt(l); err != nil {
return nil, err
}
}
return l, nil
}
// LinuxFactory implements the default factory interface for linux based systems.
type LinuxFactory struct {
// Root directory for the factory to store state.
Root string
// InitPath is the absolute path to the init binary.
InitPath string
// InitArgs are arguments for calling the init responsibilities for spawning
// a container.
InitArgs []string
// Validator provides validation to container configurations.
Validator validate.Validator
// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
}
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
if err := l.validateID(id); err != nil {
return nil, err
}
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse)
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
return &linuxContainer{
id: id,
root: containerRoot,
config: config,
initPath: l.InitPath,
initArgs: l.InitArgs,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}, nil
}
func (l *LinuxFactory) Load(id string) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
state, err := l.loadState(containerRoot)
if err != nil {
return nil, err
}
r := &restoredProcess{
processPid: state.InitProcessPid,
processStartTime: state.InitProcessStartTime,
}
return &linuxContainer{
initProcess: r,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
}, nil
}
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization(pipefd uintptr) (err error) {
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
defer func() {
// if we have an error during the initialization of the container's init then send it back to the
// parent process in the form of an initError.
if err != nil {
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(pipe)
if err := json.NewEncoder(pipe).Encode(newSystemError(err)); err != nil {
panic(err)
}
}
// ensure that this pipe is always closed
pipe.Close()
}()
i, err := newContainerInit(it, pipe)
if err != nil {
return err
}
return i.Init()
}
func (l *LinuxFactory) loadState(root string) (*State, error) {
f, err := os.Open(filepath.Join(root, stateFilename))
if err != nil {
if os.IsNotExist(err) {
return nil, newGenericError(err, ContainerNotExists)
}
return nil, newGenericError(err, SystemError)
}
defer f.Close()
var state *State
if err := json.NewDecoder(f).Decode(&state); err != nil {
return nil, newGenericError(err, SystemError)
}
return state, nil
}
func (l *LinuxFactory) validateID(id string) error {
if !idRegex.MatchString(id) {
return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat)
}
if len(id) > maxIdLen {
return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat)
}
return nil
}
// restoredProcess represents a process where the calling process may or may not be
// the parent process. This process is created when a factory loads a container from
// a persisted state.
type restoredProcess struct {
processPid int
processStartTime string
}
func (p *restoredProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *restoredProcess) pid() int {
return p.processPid
}
func (p *restoredProcess) terminate() error {
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
}
func (p *restoredProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *restoredProcess) startTime() (string, error) {
return p.processStartTime, nil
}
func (p *restoredProcess) signal(s os.Signal) error {
return newGenericError(fmt.Errorf("restored process cannot be signaled"), SystemError)
}

View file

@ -0,0 +1,125 @@
// +build linux
package libcontainer
import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/docker/libcontainer/configs"
)
func newTestRoot() (string, error) {
dir, err := ioutil.TempDir("", "libcontainer")
if err != nil {
return "", err
}
if err := os.MkdirAll(dir, 0700); err != nil {
return "", err
}
return dir, nil
}
func TestFactoryNew(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
}
func TestFactoryLoadNotExists(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
_, err = factory.Load("nocontainer")
if err == nil {
t.Fatal("expected nil error loading non-existing container")
}
lerr, ok := err.(Error)
if !ok {
t.Fatal("expected libcontainer error type")
}
if lerr.Code() != ContainerNotExists {
t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code())
}
}
func TestFactoryLoadContainer(t *testing.T) {
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
// setup default container config and state for mocking
var (
id = "1"
expectedConfig = &configs.Config{
Rootfs: "/mycontainer/root",
}
expectedState = &State{
InitProcessPid: 1024,
Config: *expectedConfig,
}
)
if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil {
t.Fatal(err)
}
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
t.Fatal(err)
}
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
container, err := factory.Load(id)
if err != nil {
t.Fatal(err)
}
if container.ID() != id {
t.Fatalf("expected container id %q but received %q", id, container.ID())
}
config := container.Config()
if config.Rootfs != expectedConfig.Rootfs {
t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs)
}
lcontainer, ok := container.(*linuxContainer)
if !ok {
t.Fatal("expected linux container on linux based systems")
}
if lcontainer.initProcess.pid() != expectedState.InitProcessPid {
t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid())
}
}
func marshal(path string, v interface{}) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return json.NewEncoder(f).Encode(v)
}

View file

@ -0,0 +1,74 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
"github.com/docker/libcontainer/stacktrace"
)
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}
Message: {{.Message}}
{{end}}
Frames:{{range $i, $frame := .Stack.Frames}}
---
{{$i}}: {{$frame.Function}}
Package: {{$frame.Package}}
File: {{$frame.File}}@{{$frame.Line}}{{end}}
`))
func newGenericError(err error, c ErrorCode) Error {
if le, ok := err.(Error); ok {
return le
}
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: c,
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
func newSystemError(err error) Error {
if le, ok := err.(Error); ok {
return le
}
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Message)
}
func (e *genericError) Code() ErrorCode {
return e.ECode
}
func (e *genericError) Detail(w io.Writer) error {
return errorTemplate.Execute(w, e)
}

View file

@ -0,0 +1,14 @@
package libcontainer
import (
"fmt"
"io/ioutil"
"testing"
)
func TestErrorDetail(t *testing.T) {
err := newGenericError(fmt.Errorf("test error"), SystemError)
if derr := err.Detail(ioutil.Discard); derr != nil {
t.Fatal(derr)
}
}

View file

@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -e
# This script runs all validations
validate() {
sed -i 's!docker/docker!docker/libcontainer!' /go/src/github.com/docker/docker/hack/make/.validate
bash /go/src/github.com/docker/docker/hack/make/validate-dco
bash /go/src/github.com/docker/docker/hack/make/validate-gofmt
}
# run validations
validate

View file

@ -0,0 +1,253 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"os"
"strings"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/netlink"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user"
"github.com/docker/libcontainer/utils"
)
type initType string
const (
initSetns initType = "setns"
initStandard initType = "standard"
)
type pid struct {
Pid int `json:"pid"`
}
// network is an internal struct used to setup container networks.
type network struct {
configs.Network
// TempVethPeerName is a unique tempory veth peer name that was placed into
// the container's namespace.
TempVethPeerName string `json:"temp_veth_peer_name"`
}
// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
User string `json:"user"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Networks []*network `json:"network"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
}
if err := populateProcessEnvironment(config.Env); err != nil {
return nil, err
}
switch t {
case initSetns:
return &linuxSetnsInit{
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
config: config,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
}
// populateProcessEnvironment loads the provided environment variables into the
// current processes's environment.
func populateProcessEnvironment(env []string) error {
for _, pair := range env {
p := strings.SplitN(pair, "=", 2)
if len(p) < 2 {
return fmt.Errorf("invalid environment '%v'", pair)
}
if err := os.Setenv(p[0], p[1]); err != nil {
return err
}
}
return nil
}
// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before execing the command inside the namespace
func finalizeNamespace(config *initConfig) error {
// Ensure that all non-standard fds we may have accidentally
// inherited are marked close-on-exec so they stay out of the
// container
if err := utils.CloseExecFrom(3); err != nil {
return err
}
w, err := newCapWhitelist(config.Config.Capabilities)
if err != nil {
return err
}
// drop capabilities in bounding set before changing user
if err := w.dropBoundingSet(); err != nil {
return err
}
// preserve existing capabilities while we change users
if err := system.SetKeepCaps(); err != nil {
return err
}
if err := setupUser(config); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil {
return err
}
// drop all other capabilities
if err := w.drop(); err != nil {
return err
}
if config.Cwd != "" {
if err := syscall.Chdir(config.Cwd); err != nil {
return err
}
}
return nil
}
// joinExistingNamespaces gets all the namespace paths specified for the container and
// does a setns on the namespace fd so that the current process joins the namespace.
func joinExistingNamespaces(namespaces []configs.Namespace) error {
for _, ns := range namespaces {
if ns.Path != "" {
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
if err != nil {
return err
}
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
f.Close()
if err != nil {
return err
}
}
}
return nil
}
// setupUser changes the groups, gid, and uid for the user inside the container
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
suppGroups := append(execUser.Sgids, config.Config.AdditionalGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
}
}
return nil
}
// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.initialize(config); err != nil {
return err
}
}
return nil
}
func setupRoute(config *configs.Config) error {
for _, config := range config.Routes {
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
return err
}
}
return nil
}
func setupRlimits(config *configs.Config) error {
for _, rlimit := range config.Rlimits {
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
// killCgroupProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending a SIGKILL to each process then waiting for them to
// exit.
func killCgroupProcesses(m cgroups.Manager) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
log.Warn(err)
}
pids, err := m.GetPids()
if err != nil {
m.Freeze(configs.Thawed)
return err
}
for _, pid := range pids {
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
if err := p.Kill(); err != nil {
log.Warn(err)
}
}
}
if err := m.Freeze(configs.Thawed); err != nil {
log.Warn(err)
}
for _, p := range procs {
if _, err := p.Wait(); err != nil {
log.Warn(err)
}
}
return nil
}

View file

@ -1,34 +1,50 @@
package integration package integration
import ( import (
"bytes"
"io/ioutil"
"os" "os"
"strings" "strings"
"testing" "testing"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/configs"
) )
func TestExecPS(t *testing.T) { func TestExecPS(t *testing.T) {
testExecPS(t, false)
}
func TestUsernsExecPS(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
testExecPS(t, true)
}
func testExecPS(t *testing.T, userns bool) {
if testing.Short() { if testing.Short() {
return return
} }
rootfs, err := newRootfs()
rootfs, err := newRootFs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
defer remove(rootfs) defer remove(rootfs)
config := newTemplateConfig(rootfs) config := newTemplateConfig(rootfs)
buffers, exitCode, err := runContainer(config, "", "ps") if userns {
if err != nil { config.UidMappings = []configs.IDMap{{0, 0, 1000}}
t.Fatal(err) config.GidMappings = []configs.IDMap{{0, 0, 1000}}
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
} }
buffers, exitCode, err := runContainer(config, "", "ps")
if err != nil {
t.Fatalf("%s: %s", buffers, err)
}
if exitCode != 0 { if exitCode != 0 {
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
} }
lines := strings.Split(buffers.Stdout.String(), "\n") lines := strings.Split(buffers.Stdout.String(), "\n")
if len(lines) < 2 { if len(lines) < 2 {
t.Fatalf("more than one process running for output %q", buffers.Stdout.String()) t.Fatalf("more than one process running for output %q", buffers.Stdout.String())
@ -45,7 +61,7 @@ func TestIPCPrivate(t *testing.T) {
return return
} }
rootfs, err := newRootFs() rootfs, err := newRootfs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -76,7 +92,7 @@ func TestIPCHost(t *testing.T) {
return return
} }
rootfs, err := newRootFs() rootfs, err := newRootfs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -88,7 +104,7 @@ func TestIPCHost(t *testing.T) {
} }
config := newTemplateConfig(rootfs) config := newTemplateConfig(rootfs)
config.Namespaces.Remove(libcontainer.NEWIPC) config.Namespaces.Remove(configs.NEWIPC)
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -108,7 +124,7 @@ func TestIPCJoinPath(t *testing.T) {
return return
} }
rootfs, err := newRootFs() rootfs, err := newRootfs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -120,7 +136,7 @@ func TestIPCJoinPath(t *testing.T) {
} }
config := newTemplateConfig(rootfs) config := newTemplateConfig(rootfs)
config.Namespaces.Add(libcontainer.NEWIPC, "/proc/1/ns/ipc") config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipc")
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil { if err != nil {
@ -141,14 +157,14 @@ func TestIPCBadPath(t *testing.T) {
return return
} }
rootfs, err := newRootFs() rootfs, err := newRootfs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
defer remove(rootfs) defer remove(rootfs)
config := newTemplateConfig(rootfs) config := newTemplateConfig(rootfs)
config.Namespaces.Add(libcontainer.NEWIPC, "/proc/1/ns/ipcc") config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipcc")
_, _, err = runContainer(config, "", "true") _, _, err = runContainer(config, "", "true")
if err == nil { if err == nil {
@ -161,7 +177,7 @@ func TestRlimit(t *testing.T) {
return return
} }
rootfs, err := newRootFs() rootfs, err := newRootfs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -172,38 +188,289 @@ func TestRlimit(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if limit := strings.TrimSpace(out.Stdout.String()); limit != "1024" { if limit := strings.TrimSpace(out.Stdout.String()); limit != "1025" {
t.Fatalf("expected rlimit to be 1024, got %s", limit) t.Fatalf("expected rlimit to be 1025, got %s", limit)
} }
} }
func TestPIDNSPrivate(t *testing.T) { func newTestRoot() (string, error) {
dir, err := ioutil.TempDir("", "libcontainer")
if err != nil {
return "", err
}
if err := os.MkdirAll(dir, 0700); err != nil {
return "", err
}
return dir, nil
}
func waitProcess(p *libcontainer.Process, t *testing.T) {
status, err := p.Wait()
if err != nil {
t.Fatal(err)
}
if !status.Success() {
t.Fatal(status)
}
}
func TestEnter(t *testing.T) {
if testing.Short() { if testing.Short() {
return return
} }
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
rootfs, err := newRootFs() rootfs, err := newRootfs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
defer remove(rootfs) defer remove(rootfs)
l, err := os.Readlink("/proc/1/ns/pid")
if err != nil {
t.Fatal(err)
}
config := newTemplateConfig(rootfs) config := newTemplateConfig(rootfs)
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/pid")
factory, err := libcontainer.New(root, libcontainer.Cgroupfs)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if exitCode != 0 { container, err := factory.Create("test", config)
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) if err != nil {
t.Fatal(err)
}
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
if err != nil {
t.Fatal(err)
} }
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l { var stdout, stdout2 bytes.Buffer
t.Fatalf("pid link should be private to the container but equals host %q %q", actual, l)
pconfig := libcontainer.Process{
Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"},
Env: standardEnvironment,
Stdin: stdinR,
Stdout: &stdout,
}
err = container.Start(&pconfig)
stdinR.Close()
defer stdinW.Close()
if err != nil {
t.Fatal(err)
}
pid, err := pconfig.Pid()
if err != nil {
t.Fatal(err)
}
// Execute another process in the container
stdinR2, stdinW2, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
pconfig2 := libcontainer.Process{
Env: standardEnvironment,
}
pconfig2.Args = []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}
pconfig2.Stdin = stdinR2
pconfig2.Stdout = &stdout2
err = container.Start(&pconfig2)
stdinR2.Close()
defer stdinW2.Close()
if err != nil {
t.Fatal(err)
}
pid2, err := pconfig2.Pid()
if err != nil {
t.Fatal(err)
}
processes, err := container.Processes()
if err != nil {
t.Fatal(err)
}
n := 0
for i := range processes {
if processes[i] == pid || processes[i] == pid2 {
n++
}
}
if n != 2 {
t.Fatal("unexpected number of processes", processes, pid, pid2)
}
// Wait processes
stdinW2.Close()
waitProcess(&pconfig2, t)
stdinW.Close()
waitProcess(&pconfig, t)
// Check that both processes live in the same pidns
pidns := string(stdout.Bytes())
if err != nil {
t.Fatal(err)
}
pidns2 := string(stdout2.Bytes())
if err != nil {
t.Fatal(err)
}
if pidns != pidns2 {
t.Fatal("The second process isn't in the required pid namespace", pidns, pidns2)
}
}
func TestProcessEnv(t *testing.T) {
if testing.Short() {
return
}
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
factory, err := libcontainer.New(root, libcontainer.Cgroupfs)
if err != nil {
t.Fatal(err)
}
container, err := factory.Create("test", config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
var stdout bytes.Buffer
pconfig := libcontainer.Process{
Args: []string{"sh", "-c", "env"},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=integration",
"TERM=xterm",
"FOO=BAR",
},
Stdin: nil,
Stdout: &stdout,
}
err = container.Start(&pconfig)
if err != nil {
t.Fatal(err)
}
// Wait for process
waitProcess(&pconfig, t)
outputEnv := string(stdout.Bytes())
if err != nil {
t.Fatal(err)
}
// Check that the environment has the key/value pair we added
if !strings.Contains(outputEnv, "FOO=BAR") {
t.Fatal("Environment doesn't have the expected FOO=BAR key/value pair: ", outputEnv)
}
// Make sure that HOME is set
if !strings.Contains(outputEnv, "HOME=/root") {
t.Fatal("Environment doesn't have HOME set: ", outputEnv)
}
}
func TestFreeze(t *testing.T) {
if testing.Short() {
return
}
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
factory, err := libcontainer.New(root, libcontainer.Cgroupfs)
if err != nil {
t.Fatal(err)
}
container, err := factory.Create("test", config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
stdinR, stdinW, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
pconfig := libcontainer.Process{
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Start(&pconfig)
stdinR.Close()
defer stdinW.Close()
if err != nil {
t.Fatal(err)
}
pid, err := pconfig.Pid()
if err != nil {
t.Fatal(err)
}
process, err := os.FindProcess(pid)
if err != nil {
t.Fatal(err)
}
if err := container.Pause(); err != nil {
t.Fatal(err)
}
state, err := container.Status()
if err != nil {
t.Fatal(err)
}
if err := container.Resume(); err != nil {
t.Fatal(err)
}
if state != libcontainer.Paused {
t.Fatal("Unexpected state: ", state)
}
stdinW.Close()
s, err := process.Wait()
if err != nil {
t.Fatal(err)
}
if !s.Success() {
t.Fatal(s.String())
} }
} }

View file

@ -1,62 +1,70 @@
package integration package integration
import ( import (
"bytes"
"io"
"os" "os"
"os/exec"
"strings" "strings"
"sync"
"testing" "testing"
"time"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces"
) )
func TestExecIn(t *testing.T) { func TestExecIn(t *testing.T) {
if testing.Short() { if testing.Short() {
return return
} }
rootfs, err := newRootfs()
rootfs, err := newRootFs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
defer remove(rootfs) defer remove(rootfs)
config := newTemplateConfig(rootfs) config := newTemplateConfig(rootfs)
if err := writeConfig(config); err != nil { container, err := newContainer(config)
t.Fatalf("failed to write config %s", err)
}
containerCmd, statePath, containerErr := startLongRunningContainer(config)
defer func() {
// kill the container
if containerCmd.Process != nil {
containerCmd.Process.Kill()
}
if err := <-containerErr; err != nil {
t.Fatal(err)
}
}()
// start the exec process
state, err := libcontainer.GetState(statePath)
if err != nil { if err != nil {
t.Fatalf("failed to get state %s", err) t.Fatal(err)
} }
buffers := newStdBuffers() defer container.Destroy()
execErr := make(chan error, 1)
go func() { // Execute a first process in the container
_, err := namespaces.ExecIn(config, state, []string{"ps"}, stdinR, stdinW, err := os.Pipe()
os.Args[0], "exec", buffers.Stdin, buffers.Stdout, buffers.Stderr, if err != nil {
"", nil) t.Fatal(err)
execErr <- err }
}() process := &libcontainer.Process{
if err := <-execErr; err != nil { Args: []string{"cat"},
t.Fatalf("exec finished with error %s", err) Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Start(process)
stdinR.Close()
defer stdinW.Close()
if err != nil {
t.Fatal(err)
} }
buffers := newStdBuffers()
ps := &libcontainer.Process{
Args: []string{"ps"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Start(ps)
if err != nil {
t.Fatal(err)
}
if _, err := ps.Wait(); err != nil {
t.Fatal(err)
}
stdinW.Close()
if _, err := process.Wait(); err != nil {
t.Log(err)
}
out := buffers.Stdout.String() out := buffers.Stdout.String()
if !strings.Contains(out, "sleep 10") || !strings.Contains(out, "ps") { if !strings.Contains(out, "cat") || !strings.Contains(out, "ps") {
t.Fatalf("unexpected running process, output %q", out) t.Fatalf("unexpected running process, output %q", out)
} }
} }
@ -65,76 +73,244 @@ func TestExecInRlimit(t *testing.T) {
if testing.Short() { if testing.Short() {
return return
} }
rootfs, err := newRootfs()
rootfs, err := newRootFs()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
defer remove(rootfs) defer remove(rootfs)
config := newTemplateConfig(rootfs) config := newTemplateConfig(rootfs)
if err := writeConfig(config); err != nil { container, err := newContainer(config)
t.Fatalf("failed to write config %s", err)
}
containerCmd, statePath, containerErr := startLongRunningContainer(config)
defer func() {
// kill the container
if containerCmd.Process != nil {
containerCmd.Process.Kill()
}
if err := <-containerErr; err != nil {
t.Fatal(err)
}
}()
// start the exec process
state, err := libcontainer.GetState(statePath)
if err != nil { if err != nil {
t.Fatalf("failed to get state %s", err) t.Fatal(err)
} }
defer container.Destroy()
stdinR, stdinW, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
process := &libcontainer.Process{
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Start(process)
stdinR.Close()
defer stdinW.Close()
if err != nil {
t.Fatal(err)
}
buffers := newStdBuffers() buffers := newStdBuffers()
execErr := make(chan error, 1) ps := &libcontainer.Process{
go func() { Args: []string{"/bin/sh", "-c", "ulimit -n"},
_, err := namespaces.ExecIn(config, state, []string{"/bin/sh", "-c", "ulimit -n"}, Env: standardEnvironment,
os.Args[0], "exec", buffers.Stdin, buffers.Stdout, buffers.Stderr, Stdin: buffers.Stdin,
"", nil) Stdout: buffers.Stdout,
execErr <- err Stderr: buffers.Stderr,
}() }
if err := <-execErr; err != nil { err = container.Start(ps)
t.Fatalf("exec finished with error %s", err) if err != nil {
t.Fatal(err)
}
if _, err := ps.Wait(); err != nil {
t.Fatal(err)
}
stdinW.Close()
if _, err := process.Wait(); err != nil {
t.Log(err)
} }
out := buffers.Stdout.String() out := buffers.Stdout.String()
if limit := strings.TrimSpace(out); limit != "1024" { if limit := strings.TrimSpace(out); limit != "1025" {
t.Fatalf("expected rlimit to be 1024, got %s", limit) t.Fatalf("expected rlimit to be 1025, got %s", limit)
} }
} }
// start a long-running container so we have time to inspect execin processes func TestExecInError(t *testing.T) {
func startLongRunningContainer(config *libcontainer.Config) (*exec.Cmd, string, chan error) { if testing.Short() {
containerErr := make(chan error, 1) return
containerCmd := &exec.Cmd{}
var statePath string
createCmd := func(container *libcontainer.Config, console, dataPath, init string,
pipe *os.File, args []string) *exec.Cmd {
containerCmd = namespaces.DefaultCreateCommand(container, console, dataPath, init, pipe, args)
statePath = dataPath
return containerCmd
} }
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
var containerStart sync.WaitGroup // Execute a first process in the container
containerStart.Add(1) stdinR, stdinW, err := os.Pipe()
go func() { if err != nil {
buffers := newStdBuffers() t.Fatal(err)
_, err := namespaces.Exec(config, }
buffers.Stdin, buffers.Stdout, buffers.Stderr, process := &libcontainer.Process{
"", config.RootFs, []string{"sleep", "10"}, Args: []string{"cat"},
createCmd, containerStart.Done) Env: standardEnvironment,
containerErr <- err Stdin: stdinR,
}
err = container.Start(process)
stdinR.Close()
defer func() {
stdinW.Close()
if _, err := process.Wait(); err != nil {
t.Log(err)
}
}() }()
containerStart.Wait() if err != nil {
t.Fatal(err)
}
return containerCmd, statePath, containerErr unexistent := &libcontainer.Process{
Args: []string{"unexistent"},
Env: standardEnvironment,
}
err = container.Start(unexistent)
if err == nil {
t.Fatal("Should be an error")
}
if !strings.Contains(err.Error(), "executable file not found") {
t.Fatalf("Should be error about not found executable, got %s", err)
}
}
func TestExecInTTY(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
process := &libcontainer.Process{
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Start(process)
stdinR.Close()
defer stdinW.Close()
if err != nil {
t.Fatal(err)
}
var stdout bytes.Buffer
ps := &libcontainer.Process{
Args: []string{"ps"},
Env: standardEnvironment,
}
console, err := ps.NewConsole(0)
copy := make(chan struct{})
go func() {
io.Copy(&stdout, console)
close(copy)
}()
if err != nil {
t.Fatal(err)
}
err = container.Start(ps)
if err != nil {
t.Fatal(err)
}
select {
case <-time.After(5 * time.Second):
t.Fatal("Waiting for copy timed out")
case <-copy:
}
if _, err := ps.Wait(); err != nil {
t.Fatal(err)
}
stdinW.Close()
if _, err := process.Wait(); err != nil {
t.Log(err)
}
out := stdout.String()
if !strings.Contains(out, "cat") || !strings.Contains(string(out), "ps") {
t.Fatalf("unexpected running process, output %q", out)
}
}
func TestExecInEnvironment(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
process := &libcontainer.Process{
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR,
}
err = container.Start(process)
stdinR.Close()
defer stdinW.Close()
if err != nil {
t.Fatal(err)
}
buffers := newStdBuffers()
process2 := &libcontainer.Process{
Args: []string{"env"},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"DEBUG=true",
"DEBUG=false",
"ENV=test",
},
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Start(process2)
if err != nil {
t.Fatal(err)
}
if _, err := process2.Wait(); err != nil {
out := buffers.Stdout.String()
t.Fatal(err, out)
}
stdinW.Close()
if _, err := process.Wait(); err != nil {
t.Log(err)
}
out := buffers.Stdout.String()
// check execin's process environment
if !strings.Contains(out, "DEBUG=false") ||
!strings.Contains(out, "ENV=test") ||
!strings.Contains(out, "HOME=/root") ||
!strings.Contains(out, "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin") ||
strings.Contains(out, "DEBUG=true") {
t.Fatalf("unexpected running process, output %q", out)
}
} }

View file

@ -1,76 +1,27 @@
package integration package integration
import ( import (
"encoding/json"
"log" "log"
"os" "os"
"runtime" "runtime"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces" _ "github.com/docker/libcontainer/nsenter"
_ "github.com/docker/libcontainer/namespaces/nsenter"
) )
// init runs the libcontainer initialization code because of the busybox style needs // init runs the libcontainer initialization code because of the busybox style needs
// to work around the go runtime and the issues with forking // to work around the go runtime and the issues with forking
func init() { func init() {
if len(os.Args) < 2 { if len(os.Args) < 2 || os.Args[1] != "init" {
return return
} }
// handle init runtime.GOMAXPROCS(1)
if len(os.Args) >= 2 && os.Args[1] == "init" { runtime.LockOSThread()
runtime.LockOSThread() factory, err := libcontainer.New("")
if err != nil {
container, err := loadConfig() log.Fatalf("unable to initialize for container: %s", err)
if err != nil {
log.Fatal(err)
}
rootfs, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil {
log.Fatalf("unable to initialize for container: %s", err)
}
os.Exit(1)
} }
if err := factory.StartInitialization(3); err != nil {
// handle execin log.Fatal(err)
if len(os.Args) >= 2 && os.Args[0] == "nsenter-exec" {
runtime.LockOSThread()
// User args are passed after '--' in the command line.
userArgs := findUserArgs()
config, err := loadConfigFromFd()
if err != nil {
log.Fatalf("docker-exec: unable to receive config from sync pipe: %s", err)
}
if err := namespaces.FinalizeSetns(config, userArgs); err != nil {
log.Fatalf("docker-exec: failed to exec: %s", err)
}
os.Exit(1)
} }
} }
func findUserArgs() []string {
for i, a := range os.Args {
if a == "--" {
return os.Args[i+1:]
}
}
return []string{}
}
// loadConfigFromFd loads a container's config from the sync pipe that is provided by
// fd 3 when running a process
func loadConfigFromFd() (*libcontainer.Config, error) {
var config *libcontainer.Config
if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
return nil, err
}
return config, nil
}

View file

@ -3,19 +3,25 @@ package integration
import ( import (
"syscall" "syscall"
"github.com/docker/libcontainer" "github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/devices"
) )
var standardEnvironment = []string{
"HOME=/root",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=integration",
"TERM=xterm",
}
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// newTemplateConfig returns a base template for running a container // newTemplateConfig returns a base template for running a container
// //
// it uses a network strategy of just setting a loopback interface // it uses a network strategy of just setting a loopback interface
// and the default setup for devices // and the default setup for devices
func newTemplateConfig(rootfs string) *libcontainer.Config { func newTemplateConfig(rootfs string) *configs.Config {
return &libcontainer.Config{ return &configs.Config{
RootFs: rootfs, Rootfs: rootfs,
Tty: false,
Capabilities: []string{ Capabilities: []string{
"CHOWN", "CHOWN",
"DAC_OVERRIDE", "DAC_OVERRIDE",
@ -32,41 +38,60 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
"KILL", "KILL",
"AUDIT_WRITE", "AUDIT_WRITE",
}, },
Namespaces: libcontainer.Namespaces([]libcontainer.Namespace{ Namespaces: configs.Namespaces([]configs.Namespace{
{Type: libcontainer.NEWNS}, {Type: configs.NEWNS},
{Type: libcontainer.NEWUTS}, {Type: configs.NEWUTS},
{Type: libcontainer.NEWIPC}, {Type: configs.NEWIPC},
{Type: libcontainer.NEWPID}, {Type: configs.NEWPID},
{Type: libcontainer.NEWNET}, {Type: configs.NEWNET},
}), }),
Cgroups: &cgroups.Cgroup{ Cgroups: &configs.Cgroup{
Name: "test",
Parent: "integration", Parent: "integration",
AllowAllDevices: false, AllowAllDevices: false,
AllowedDevices: devices.DefaultAllowedDevices, AllowedDevices: configs.DefaultAllowedDevices,
}, },
MaskPaths: []string{
MountConfig: &libcontainer.MountConfig{ "/proc/kcore",
DeviceNodes: devices.DefaultAutoCreatedDevices,
}, },
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Hostname: "integration", Hostname: "integration",
Env: []string{ Mounts: []*configs.Mount{
"HOME=/root", {
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", Device: "tmpfs",
"HOSTNAME=integration", Source: "shm",
"TERM=xterm", Destination: "/dev/shm",
Data: "mode=1777,size=65536k",
Flags: defaultMountFlags,
},
{
Source: "mqueue",
Destination: "/dev/mqueue",
Device: "mqueue",
Flags: defaultMountFlags,
},
{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
}, },
Networks: []*libcontainer.Network{ Networks: []*configs.Network{
{ {
Type: "loopback", Type: "loopback",
Address: "127.0.0.1/0", Address: "127.0.0.1/0",
Gateway: "localhost", Gateway: "localhost",
}, },
}, },
Rlimits: []libcontainer.Rlimit{ Rlimits: []configs.Rlimit{
{ {
Type: syscall.RLIMIT_NOFILE, Type: syscall.RLIMIT_NOFILE,
Hard: uint64(1024), Hard: uint64(1025),
Soft: uint64(1024), Soft: uint64(1025),
}, },
}, },
} }

View file

@ -2,15 +2,15 @@ package integration
import ( import (
"bytes" "bytes"
"encoding/json"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "strings"
"syscall"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces" "github.com/docker/libcontainer/configs"
) )
func newStdBuffers() *stdBuffers { func newStdBuffers() *stdBuffers {
@ -27,31 +27,19 @@ type stdBuffers struct {
Stderr *bytes.Buffer Stderr *bytes.Buffer
} }
func writeConfig(config *libcontainer.Config) error { func (b *stdBuffers) String() string {
f, err := os.OpenFile(filepath.Join(config.RootFs, "container.json"), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0700) s := []string{}
if err != nil { if b.Stderr != nil {
return err s = append(s, b.Stderr.String())
} }
defer f.Close() if b.Stdout != nil {
return json.NewEncoder(f).Encode(config) s = append(s, b.Stdout.String())
}
return strings.Join(s, "|")
} }
func loadConfig() (*libcontainer.Config, error) { // newRootfs creates a new tmp directory and copies the busybox root filesystem
f, err := os.Open(filepath.Join(os.Getenv("data_path"), "container.json")) func newRootfs() (string, error) {
if err != nil {
return nil, err
}
defer f.Close()
var container *libcontainer.Config
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
return container, nil
}
// newRootFs creates a new tmp directory and copies the busybox root filesystem
func newRootFs() (string, error) {
dir, err := ioutil.TempDir("", "") dir, err := ioutil.TempDir("", "")
if err != nil { if err != nil {
return "", err return "", err
@ -79,17 +67,51 @@ func copyBusybox(dest string) error {
return nil return nil
} }
func newContainer(config *configs.Config) (libcontainer.Container, error) {
factory, err := libcontainer.New(".",
libcontainer.InitArgs(os.Args[0], "init", "--"),
libcontainer.Cgroupfs,
)
if err != nil {
return nil, err
}
return factory.Create("testCT", config)
}
// runContainer runs the container with the specific config and arguments // runContainer runs the container with the specific config and arguments
// //
// buffers are returned containing the STDOUT and STDERR output for the run // buffers are returned containing the STDOUT and STDERR output for the run
// along with the exit code and any go error // along with the exit code and any go error
func runContainer(config *libcontainer.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) { func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) {
if err := writeConfig(config); err != nil { container, err := newContainer(config)
if err != nil {
return nil, -1, err return nil, -1, err
} }
defer container.Destroy()
buffers = newStdBuffers() buffers = newStdBuffers()
exitCode, err = namespaces.Exec(config, buffers.Stdin, buffers.Stdout, buffers.Stderr, process := &libcontainer.Process{
console, config.RootFs, args, namespaces.DefaultCreateCommand, nil) Args: args,
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Start(process)
if err != nil {
return nil, -1, err
}
ps, err := process.Wait()
if err != nil {
return nil, -1, err
}
status := ps.Sys().(syscall.WaitStatus)
if status.Exited() {
exitCode = status.ExitStatus()
} else if status.Signaled() {
exitCode = -int(status.Signal())
} else {
return nil, -1, err
}
return return
} }

View file

@ -1,209 +0,0 @@
// +build linux
package mount
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/mount/nodes"
)
// default mount point flags
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
type mount struct {
source string
path string
device string
flags int
data string
}
// InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountConfig *MountConfig) error {
var (
err error
flag = syscall.MS_PRIVATE
)
if mountConfig.NoPivotRoot {
flag = syscall.MS_SLAVE
}
if err := syscall.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
return fmt.Errorf("mounting / with flags %X %s", (flag | syscall.MS_REC), err)
}
if err := syscall.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
}
if err := mountSystem(rootfs, sysReadonly, mountConfig); err != nil {
return fmt.Errorf("mount system %s", err)
}
// apply any user specified mounts within the new mount namespace
for _, m := range mountConfig.Mounts {
if err := m.Mount(rootfs, mountConfig.MountLabel); err != nil {
return err
}
}
if err := nodes.CreateDeviceNodes(rootfs, mountConfig.DeviceNodes); err != nil {
return fmt.Errorf("create device nodes %s", err)
}
if err := SetupPtmx(rootfs, console, mountConfig.MountLabel); err != nil {
return err
}
// stdin, stdout and stderr could be pointing to /dev/null from parent namespace.
// Re-open them inside this namespace.
if err := reOpenDevNull(rootfs); err != nil {
return fmt.Errorf("Failed to reopen /dev/null %s", err)
}
if err := setupDevSymlinks(rootfs); err != nil {
return fmt.Errorf("dev symlinks %s", err)
}
if err := syscall.Chdir(rootfs); err != nil {
return fmt.Errorf("chdir into %s %s", rootfs, err)
}
if mountConfig.NoPivotRoot {
err = MsMoveRoot(rootfs)
} else {
err = PivotRoot(rootfs)
}
if err != nil {
return err
}
if mountConfig.ReadonlyFs {
if err := SetReadonly(); err != nil {
return fmt.Errorf("set readonly %s", err)
}
}
syscall.Umask(0022)
return nil
}
// mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts
// inside the mount namespace
func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error {
for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) {
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("mkdirall %s %s", m.path, err)
}
if err := syscall.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
}
}
return nil
}
func createIfNotExists(path string, isDir bool) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
if isDir {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
} else {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
f, err := os.OpenFile(path, os.O_CREATE, 0755)
if err != nil {
return err
}
f.Close()
}
}
}
return nil
}
func setupDevSymlinks(rootfs string) error {
var links = [][2]string{
{"/proc/self/fd", "/dev/fd"},
{"/proc/self/fd/0", "/dev/stdin"},
{"/proc/self/fd/1", "/dev/stdout"},
{"/proc/self/fd/2", "/dev/stderr"},
}
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
// in /dev if it exists in /proc.
if _, err := os.Stat("/proc/kcore"); err == nil {
links = append(links, [2]string{"/proc/kcore", "/dev/kcore"})
}
for _, link := range links {
var (
src = link[0]
dst = filepath.Join(rootfs, link[1])
)
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
return fmt.Errorf("symlink %s %s %s", src, dst, err)
}
}
return nil
}
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
// standard bind mounts allowing them to be more dynamic
func newSystemMounts(rootfs, mountLabel string, sysReadonly bool) []mount {
systemMounts := []mount{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "mqueue", path: filepath.Join(rootfs, "dev", "mqueue"), device: "mqueue", flags: defaultMountFlags},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
}
sysMountFlags := defaultMountFlags
if sysReadonly {
sysMountFlags |= syscall.MS_RDONLY
}
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: sysMountFlags})
return systemMounts
}
// Is stdin, stdout or stderr were to be pointing to '/dev/null',
// this method will make them point to '/dev/null' from within this namespace.
func reOpenDevNull(rootfs string) error {
var stat, devNullStat syscall.Stat_t
file, err := os.Open(filepath.Join(rootfs, "/dev/null"))
if err != nil {
return fmt.Errorf("Failed to open /dev/null - %s", err)
}
defer file.Close()
if err = syscall.Fstat(int(file.Fd()), &devNullStat); err != nil {
return fmt.Errorf("Failed to stat /dev/null - %s", err)
}
for fd := 0; fd < 3; fd++ {
if err = syscall.Fstat(fd, &stat); err != nil {
return fmt.Errorf("Failed to stat fd %d - %s", fd, err)
}
if stat.Rdev == devNullStat.Rdev {
// Close and re-open the fd.
if err = syscall.Dup2(int(file.Fd()), fd); err != nil {
return fmt.Errorf("Failed to dup fd %d to fd %d - %s", file.Fd(), fd, err)
}
}
}
return nil
}

View file

@ -1,109 +0,0 @@
package mount
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/docker/docker/pkg/symlink"
"github.com/docker/libcontainer/label"
)
type Mount struct {
Type string `json:"type,omitempty"`
Source string `json:"source,omitempty"` // Source path, in the host namespace
Destination string `json:"destination,omitempty"` // Destination path, in the container
Writable bool `json:"writable,omitempty"`
Relabel string `json:"relabel,omitempty"` // Relabel source if set, "z" indicates shared, "Z" indicates unshared
Private bool `json:"private,omitempty"`
Slave bool `json:"slave,omitempty"`
}
func (m *Mount) Mount(rootfs, mountLabel string) error {
switch m.Type {
case "bind":
return m.bindMount(rootfs, mountLabel)
case "tmpfs":
return m.tmpfsMount(rootfs, mountLabel)
default:
return fmt.Errorf("unsupported mount type %s for %s", m.Type, m.Destination)
}
}
func (m *Mount) bindMount(rootfs, mountLabel string) error {
var (
flags = syscall.MS_BIND | syscall.MS_REC
dest = filepath.Join(rootfs, m.Destination)
)
if !m.Writable {
flags = flags | syscall.MS_RDONLY
}
if m.Slave {
flags = flags | syscall.MS_SLAVE
}
stat, err := os.Stat(m.Source)
if err != nil {
return err
}
// FIXME: (crosbymichael) This does not belong here and should be done a layer above
dest, err = symlink.FollowSymlinkInScope(dest, rootfs)
if err != nil {
return err
}
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return fmt.Errorf("creating new bind mount target %s", err)
}
if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
}
if !m.Writable {
if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
}
}
if m.Relabel != "" {
if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil {
return fmt.Errorf("relabeling %s to %s %s", m.Source, mountLabel, err)
}
}
if m.Private {
if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
return fmt.Errorf("mounting %s private %s", dest, err)
}
}
return nil
}
func (m *Mount) tmpfsMount(rootfs, mountLabel string) error {
var (
err error
l = label.FormatMountLabel("", mountLabel)
dest = filepath.Join(rootfs, m.Destination)
)
// FIXME: (crosbymichael) This does not belong here and should be done a layer above
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
return err
}
if err := createIfNotExists(dest, true); err != nil {
return fmt.Errorf("creating new tmpfs mount target %s", err)
}
if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l); err != nil {
return fmt.Errorf("%s mounting %s in tmpfs", err, dest)
}
return nil
}

View file

@ -1,28 +0,0 @@
package mount
import (
"errors"
"github.com/docker/libcontainer/devices"
)
var ErrUnsupported = errors.New("Unsupported method")
type MountConfig struct {
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
// This is a common option when the container is running in ramdisk
NoPivotRoot bool `json:"no_pivot_root,omitempty"`
// ReadonlyFs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable
ReadonlyFs bool `json:"readonly_fs,omitempty"`
// Mounts specify additional source and destination paths that will be mounted inside the container's
// rootfs and mount namespace if specified
Mounts []*Mount `json:"mounts,omitempty"`
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
DeviceNodes []*devices.Device `json:"device_nodes,omitempty"`
MountLabel string `json:"mount_label,omitempty"`
}

View file

@ -1,20 +0,0 @@
// +build linux
package mount
import (
"fmt"
"syscall"
)
func MsMoveRoot(rootfs string) error {
if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return fmt.Errorf("mount move %s into / %s", rootfs, err)
}
if err := syscall.Chroot("."); err != nil {
return fmt.Errorf("chroot . %s", err)
}
return syscall.Chdir("/")
}

View file

@ -1,57 +0,0 @@
// +build linux
package nodes
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/docker/libcontainer/devices"
)
// Create the device nodes in the container.
func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
for _, node := range nodesToCreate {
if err := CreateDeviceNode(rootfs, node); err != nil {
return err
}
}
return nil
}
// Creates the device node in the rootfs of the container.
func CreateDeviceNode(rootfs string, node *devices.Device) error {
var (
dest = filepath.Join(rootfs, node.Path)
parent = filepath.Dir(dest)
)
if err := os.MkdirAll(parent, 0755); err != nil {
return err
}
fileMode := node.FileMode
switch node.Type {
case 'c':
fileMode |= syscall.S_IFCHR
case 'b':
fileMode |= syscall.S_IFBLK
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), devices.Mkdev(node.MajorNumber, node.MinorNumber)); err != nil && !os.IsExist(err) {
return fmt.Errorf("mknod %s %s", node.Path, err)
}
if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil {
return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid)
}
return nil
}

View file

@ -1,13 +0,0 @@
// +build !linux
package nodes
import (
"errors"
"github.com/docker/libcontainer/devices"
)
func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error {
return errors.New("Unsupported method")
}

View file

@ -1,34 +0,0 @@
// +build linux
package mount
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
func PivotRoot(rootfs string) error {
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
}
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}
return os.Remove(pivotDir)
}

View file

@ -1,30 +0,0 @@
// +build linux
package mount
import (
"fmt"
"os"
"path/filepath"
"github.com/docker/libcontainer/console"
)
func SetupPtmx(rootfs, consolePath, mountLabel string) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if consolePath != "" {
if err := console.Setup(rootfs, consolePath, mountLabel); err != nil {
return err
}
}
return nil
}

View file

@ -1,11 +0,0 @@
// +build linux
package mount
import (
"syscall"
)
func SetReadonly() error {
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}

View file

@ -1,31 +0,0 @@
// +build linux
package mount
import "syscall"
func RemountProc() error {
if err := syscall.Unmount("/proc", syscall.MNT_DETACH); err != nil {
return err
}
if err := syscall.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
return err
}
return nil
}
func RemountSys() error {
if err := syscall.Unmount("/sys", syscall.MNT_DETACH); err != nil {
if err != syscall.EINVAL {
return err
}
} else {
if err := syscall.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
return err
}
}
return nil
}

View file

@ -1,10 +0,0 @@
package namespaces
import (
"os"
"os/exec"
"github.com/docker/libcontainer"
)
type CreateCommand func(container *libcontainer.Config, console, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd

View file

@ -1,229 +0,0 @@
// +build linux
package namespaces
import (
"encoding/json"
"io"
"os"
"os/exec"
"syscall"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/system"
)
const (
EXIT_SIGNAL_OFFSET = 128
)
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
// Move this to libcontainer package.
// Exec performs setup outside of a namespace so that a container can be
// executed. Exec is a high level function for working with container namespaces.
func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
var err error
// create a pipe so that we can syncronize with the namespaced process and
// pass the state and configuration to the child process
parent, child, err := newInitPipe()
if err != nil {
return -1, err
}
defer parent.Close()
command := createCommand(container, console, dataPath, os.Args[0], child, args)
// Note: these are only used in non-tty mode
// if there is a tty for the container it will be opened within the namespace and the
// fds will be duped to stdin, stdiout, and stderr
command.Stdin = stdin
command.Stdout = stdout
command.Stderr = stderr
if err := command.Start(); err != nil {
child.Close()
return -1, err
}
child.Close()
wait := func() (*os.ProcessState, error) {
ps, err := command.Process.Wait()
// we should kill all processes in cgroup when init is died if we use
// host PID namespace
if !container.Namespaces.Contains(libcontainer.NEWPID) {
killAllPids(container)
}
return ps, err
}
terminate := func(terr error) (int, error) {
// TODO: log the errors for kill and wait
command.Process.Kill()
wait()
return -1, terr
}
started, err := system.GetProcessStartTime(command.Process.Pid)
if err != nil {
return terminate(err)
}
// Do this before syncing with child so that no children
// can escape the cgroup
cgroupPaths, err := SetupCgroups(container, command.Process.Pid)
if err != nil {
return terminate(err)
}
defer cgroups.RemovePaths(cgroupPaths)
var networkState network.NetworkState
if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
return terminate(err)
}
// send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(parent).Encode(networkState); err != nil {
return terminate(err)
}
// shutdown writes for the parent side of the pipe
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
return terminate(err)
}
state := &libcontainer.State{
InitPid: command.Process.Pid,
InitStartTime: started,
NetworkState: networkState,
CgroupPaths: cgroupPaths,
}
if err := libcontainer.SaveState(dataPath, state); err != nil {
return terminate(err)
}
defer libcontainer.DeleteState(dataPath)
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *initError
if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
return terminate(err)
}
if ierr != nil {
return terminate(ierr)
}
if startCallback != nil {
startCallback()
}
ps, err := wait()
if err != nil {
if _, ok := err.(*exec.ExitError); !ok {
return -1, err
}
}
// waiting for pipe flushing
command.Wait()
waitStatus := ps.Sys().(syscall.WaitStatus)
if waitStatus.Signaled() {
return EXIT_SIGNAL_OFFSET + int(waitStatus.Signal()), nil
}
return waitStatus.ExitStatus(), nil
}
// killAllPids itterates over all of the container's processes
// sending a SIGKILL to each process.
func killAllPids(container *libcontainer.Config) error {
var (
procs []*os.Process
freeze = fs.Freeze
getPids = fs.GetPids
)
if systemd.UseSystemd() {
freeze = systemd.Freeze
getPids = systemd.GetPids
}
freeze(container.Cgroups, cgroups.Frozen)
pids, err := getPids(container.Cgroups)
if err != nil {
return err
}
for _, pid := range pids {
// TODO: log err without aborting if we are unable to find
// a single PID
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
p.Kill()
}
}
freeze(container.Cgroups, cgroups.Thawed)
for _, p := range procs {
p.Wait()
}
return err
}
// DefaultCreateCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
// defined on the container's configuration and use the current binary as the init with the
// args provided
//
// console: the /dev/console to setup inside the container
// init: the program executed inside the namespaces
// root: the path to the container json file and information
// pipe: sync pipe to synchronize the parent and child processes
// args: the arguments to pass to the container to run as the user's program
func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd {
// get our binary name from arg0 so we can always reexec ourself
env := []string{
"console=" + console,
"pipe=3",
"data_path=" + dataPath,
}
command := exec.Command(init, append([]string{"init", "--"}, args...)...)
// make sure the process is executed inside the context of the rootfs
command.Dir = container.RootFs
command.Env = append(os.Environ(), env...)
if command.SysProcAttr == nil {
command.SysProcAttr = &syscall.SysProcAttr{}
}
command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces))
command.SysProcAttr.Pdeathsig = syscall.SIGKILL
command.ExtraFiles = []*os.File{pipe}
return command
}
// SetupCgroups applies the cgroup restrictions to the process running in the container based
// on the container's configuration
func SetupCgroups(container *libcontainer.Config, nspid int) (map[string]string, error) {
if container.Cgroups != nil {
c := container.Cgroups
if systemd.UseSystemd() {
return systemd.Apply(c, nspid)
}
return fs.Apply(c, nspid)
}
return map[string]string{}, nil
}
// InitializeNetworking creates the container's network stack outside of the namespace and moves
// interfaces into the container's net namespaces if necessary
func InitializeNetworking(container *libcontainer.Config, nspid int, networkState *network.NetworkState) error {
for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.Create((*network.Network)(config), nspid, networkState); err != nil {
return err
}
}
return nil
}

View file

@ -1,132 +0,0 @@
// +build linux
package namespaces
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/system"
)
// ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the
// setns code in a single threaded environment joining the existing containers' namespaces.
func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs []string, initPath, action string,
stdin io.Reader, stdout, stderr io.Writer, console string, startCallback func(*exec.Cmd)) (int, error) {
args := []string{fmt.Sprintf("nsenter-%s", action), "--nspid", strconv.Itoa(state.InitPid)}
if console != "" {
args = append(args, "--console", console)
}
cmd := &exec.Cmd{
Path: initPath,
Args: append(args, append([]string{"--"}, userArgs...)...),
}
if filepath.Base(initPath) == initPath {
if lp, err := exec.LookPath(initPath); err == nil {
cmd.Path = lp
}
}
parent, child, err := newInitPipe()
if err != nil {
return -1, err
}
defer parent.Close()
// Note: these are only used in non-tty mode
// if there is a tty for the container it will be opened within the namespace and the
// fds will be duped to stdin, stdiout, and stderr
cmd.Stdin = stdin
cmd.Stdout = stdout
cmd.Stderr = stderr
cmd.ExtraFiles = []*os.File{child}
if err := cmd.Start(); err != nil {
child.Close()
return -1, err
}
child.Close()
terminate := func(terr error) (int, error) {
// TODO: log the errors for kill and wait
cmd.Process.Kill()
cmd.Wait()
return -1, terr
}
// Enter cgroups.
if err := EnterCgroups(state, cmd.Process.Pid); err != nil {
return terminate(err)
}
// finish cgroups' setup, unblock the child process.
if _, err := parent.WriteString("1"); err != nil {
return terminate(err)
}
if err := json.NewEncoder(parent).Encode(container); err != nil {
return terminate(err)
}
if startCallback != nil {
startCallback(cmd)
}
if err := cmd.Wait(); err != nil {
if _, ok := err.(*exec.ExitError); !ok {
return -1, err
}
}
return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
}
// Finalize expects that the setns calls have been setup and that is has joined an
// existing namespace
func FinalizeSetns(container *libcontainer.Config, args []string) error {
// clear the current processes env and replace it with the environment defined on the container
if err := LoadContainerEnvironment(container); err != nil {
return err
}
if err := setupRlimits(container); err != nil {
return fmt.Errorf("setup rlimits %s", err)
}
if err := FinalizeNamespace(container); err != nil {
return err
}
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
}
if container.ProcessLabel != "" {
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
return err
}
}
if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
return err
}
panic("unreachable")
}
func EnterCgroups(state *libcontainer.State, pid int) error {
return cgroups.EnterPid(state.CgroupPaths, pid)
}

View file

@ -1,331 +0,0 @@
// +build linux
package namespaces
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"strings"
"syscall"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/console"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/mount"
"github.com/docker/libcontainer/netlink"
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/security/capabilities"
"github.com/docker/libcontainer/security/restrict"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user"
"github.com/docker/libcontainer/utils"
)
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
// Move this to libcontainer package.
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
// and other options required for the new container.
// The caller of Init function has to ensure that the go runtime is locked to an OS thread
// (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended.
func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) {
defer func() {
// if we have an error during the initialization of the container's init then send it back to the
// parent process in the form of an initError.
if err != nil {
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(pipe)
if err := json.NewEncoder(pipe).Encode(initError{
Message: err.Error(),
}); err != nil {
panic(err)
}
}
// ensure that this pipe is always closed
pipe.Close()
}()
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
if err != nil {
return err
}
// clear the current processes env and replace it with the environment
// defined on the container
if err := LoadContainerEnvironment(container); err != nil {
return err
}
// We always read this as it is a way to sync with the parent as well
var networkState *network.NetworkState
if err := json.NewDecoder(pipe).Decode(&networkState); err != nil {
return err
}
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(container.Namespaces); err != nil {
return err
}
if consolePath != "" {
if err := console.OpenAndDup(consolePath); err != nil {
return err
}
}
if _, err := syscall.Setsid(); err != nil {
return fmt.Errorf("setsid %s", err)
}
if consolePath != "" {
if err := system.Setctty(); err != nil {
return fmt.Errorf("setctty %s", err)
}
}
if err := setupNetwork(container, networkState); err != nil {
return fmt.Errorf("setup networking %s", err)
}
if err := setupRoute(container); err != nil {
return fmt.Errorf("setup route %s", err)
}
if err := setupRlimits(container); err != nil {
return fmt.Errorf("setup rlimits %s", err)
}
label.Init()
if err := mount.InitializeMountNamespace(rootfs,
consolePath,
container.RestrictSys,
(*mount.MountConfig)(container.MountConfig)); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
if container.Hostname != "" {
if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
}
}
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
}
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
return fmt.Errorf("set process label %s", err)
}
// TODO: (crosbymichael) make this configurable at the Config level
if container.RestrictSys {
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
return err
}
}
pdeathSignal, err := system.GetParentDeathSignal()
if err != nil {
return fmt.Errorf("get parent death signal %s", err)
}
if err := FinalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
// FinalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
return fmt.Errorf("restore parent death signal %s", err)
}
return system.Execv(args[0], args[0:], os.Environ())
}
// RestoreParentDeathSignal sets the parent death signal to old.
func RestoreParentDeathSignal(old int) error {
if old == 0 {
return nil
}
current, err := system.GetParentDeathSignal()
if err != nil {
return fmt.Errorf("get parent death signal %s", err)
}
if old == current {
return nil
}
if err := system.ParentDeathSignal(uintptr(old)); err != nil {
return fmt.Errorf("set parent death signal %s", err)
}
// Signal self if parent is already dead. Does nothing if running in a new
// PID namespace, as Getppid will always return 0.
if syscall.Getppid() == 1 {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
}
return nil
}
// SetupUser changes the groups, gid, and uid for the user inside the container
func SetupUser(container *libcontainer.Config) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(container.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return fmt.Errorf("get supplementary groups %s", err)
}
suppGroups := append(execUser.Sgids, container.AdditionalGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return fmt.Errorf("setgroups %s", err)
}
if err := system.Setgid(execUser.Gid); err != nil {
return fmt.Errorf("setgid %s", err)
}
if err := system.Setuid(execUser.Uid); err != nil {
return fmt.Errorf("setuid %s", err)
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return fmt.Errorf("set HOME %s", err)
}
}
return nil
}
// setupVethNetwork uses the Network config if it is not nil to initialize
// the new veth interface inside the container for use by changing the name to eth0
// setting the MTU and IP address along with the default gateway
func setupNetwork(container *libcontainer.Config, networkState *network.NetworkState) error {
for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
err1 := strategy.Initialize((*network.Network)(config), networkState)
if err1 != nil {
return err1
}
}
return nil
}
func setupRoute(container *libcontainer.Config) error {
for _, config := range container.Routes {
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
return err
}
}
return nil
}
func setupRlimits(container *libcontainer.Config) error {
for _, rlimit := range container.Rlimits {
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
// FinalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaky file descriptors
// before execing the command inside the namespace
func FinalizeNamespace(container *libcontainer.Config) error {
// Ensure that all non-standard fds we may have accidentally
// inherited are marked close-on-exec so they stay out of the
// container
if err := utils.CloseExecFrom(3); err != nil {
return fmt.Errorf("close open file descriptors %s", err)
}
// drop capabilities in bounding set before changing user
if err := capabilities.DropBoundingSet(container.Capabilities); err != nil {
return fmt.Errorf("drop bounding set %s", err)
}
// preserve existing capabilities while we change users
if err := system.SetKeepCaps(); err != nil {
return fmt.Errorf("set keep caps %s", err)
}
if err := SetupUser(container); err != nil {
return fmt.Errorf("setup user %s", err)
}
if err := system.ClearKeepCaps(); err != nil {
return fmt.Errorf("clear keep caps %s", err)
}
// drop all other capabilities
if err := capabilities.DropCapabilities(container.Capabilities); err != nil {
return fmt.Errorf("drop capabilities %s", err)
}
if container.WorkingDir != "" {
if err := syscall.Chdir(container.WorkingDir); err != nil {
return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
}
}
return nil
}
func LoadContainerEnvironment(container *libcontainer.Config) error {
os.Clearenv()
for _, pair := range container.Env {
p := strings.SplitN(pair, "=", 2)
if len(p) < 2 {
return fmt.Errorf("invalid environment '%v'", pair)
}
if err := os.Setenv(p[0], p[1]); err != nil {
return err
}
}
return nil
}
// joinExistingNamespaces gets all the namespace paths specified for the container and
// does a setns on the namespace fd so that the current process joins the namespace.
func joinExistingNamespaces(namespaces []libcontainer.Namespace) error {
for _, ns := range namespaces {
if ns.Path != "" {
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
if err != nil {
return err
}
err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type]))
f.Close()
if err != nil {
return err
}
}
}
return nil
}

View file

@ -1,245 +0,0 @@
// +build cgo
//
// formated with indent -linux nsenter.c
#include <errno.h>
#include <fcntl.h>
#include <linux/limits.h>
#include <linux/sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <unistd.h>
#include <getopt.h>
#define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__)
static const kBufSize = 256;
static const char *kNsEnter = "nsenter";
void get_args(int *argc, char ***argv)
{
// Read argv
int fd = open("/proc/self/cmdline", O_RDONLY);
if (fd < 0) {
pr_perror("Unable to open /proc/self/cmdline");
exit(1);
}
// Read the whole commandline.
ssize_t contents_size = 0;
ssize_t contents_offset = 0;
char *contents = NULL;
ssize_t bytes_read = 0;
do {
contents_size += kBufSize;
contents = (char *)realloc(contents, contents_size);
bytes_read =
read(fd, contents + contents_offset,
contents_size - contents_offset);
if (bytes_read < 0) {
pr_perror("Unable to read from /proc/self/cmdline");
exit(1);
}
contents_offset += bytes_read;
}
while (bytes_read > 0);
close(fd);
// Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args.
ssize_t i;
*argc = 0;
for (i = 0; i < contents_offset; i++) {
if (contents[i] == '\0') {
(*argc)++;
}
}
*argv = (char **)malloc(sizeof(char *) * ((*argc) + 1));
int idx;
for (idx = 0; idx < (*argc); idx++) {
(*argv)[idx] = contents;
contents += strlen(contents) + 1;
}
(*argv)[*argc] = NULL;
}
// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12)
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
#define _GNU_SOURCE
#include <sched.h>
#include "syscall.h"
#ifdef SYS_setns
int setns(int fd, int nstype)
{
return syscall(SYS_setns, fd, nstype);
}
#endif
#endif
void print_usage()
{
fprintf(stderr,
"nsenter --nspid <pid> --console <console> -- cmd1 arg1 arg2...\n");
}
void nsenter()
{
int argc, c;
char **argv;
get_args(&argc, &argv);
// check argv 0 to ensure that we are supposed to setns
// we use strncmp to test for a value of "nsenter" but also allows alternate implmentations
// after the setns code path to continue to use the argv 0 to determine actions to be run
// resulting in the ability to specify "nsenter-mknod", "nsenter-exec", etc...
if (strncmp(argv[0], kNsEnter, strlen(kNsEnter)) != 0) {
return;
}
#ifdef PR_SET_CHILD_SUBREAPER
if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) {
pr_perror("Failed to set child subreaper");
exit(1);
}
#endif
static const struct option longopts[] = {
{"nspid", required_argument, NULL, 'n'},
{"console", required_argument, NULL, 't'},
{NULL, 0, NULL, 0}
};
pid_t init_pid = -1;
char *init_pid_str = NULL;
char *console = NULL;
while ((c = getopt_long_only(argc, argv, "n:c:", longopts, NULL)) != -1) {
switch (c) {
case 'n':
init_pid_str = optarg;
break;
case 't':
console = optarg;
break;
}
}
if (init_pid_str == NULL) {
print_usage();
exit(1);
}
init_pid = strtol(init_pid_str, NULL, 10);
if ((init_pid == 0 && errno == EINVAL) || errno == ERANGE) {
pr_perror("Failed to parse PID from \"%s\" with output \"%d\"",
init_pid_str, init_pid);
print_usage();
exit(1);
}
argc -= 3;
argv += 3;
if (setsid() == -1) {
pr_perror("setsid failed");
exit(1);
}
// before we setns we need to dup the console
int consolefd = -1;
if (console != NULL) {
consolefd = open(console, O_RDWR);
if (consolefd < 0) {
pr_perror("Failed to open console %s", console);
exit(1);
}
}
// blocking until the parent placed the process inside correct cgroups.
unsigned char s;
if (read(3, &s, 1) != 1 || s != '1') {
pr_perror("failed to receive synchronization data from parent");
exit(1);
}
// Setns on all supported namespaces.
char ns_dir[PATH_MAX];
memset(ns_dir, 0, PATH_MAX);
snprintf(ns_dir, PATH_MAX - 1, "/proc/%d/ns/", init_pid);
int ns_dir_fd;
ns_dir_fd = open(ns_dir, O_RDONLY | O_DIRECTORY);
if (ns_dir_fd < 0) {
pr_perror("Unable to open %s", ns_dir);
exit(1);
}
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
const int num = sizeof(namespaces) / sizeof(char *);
int i;
for (i = 0; i < num; i++) {
// A zombie process has links on namespaces, but they can't be opened
struct stat st;
if (fstatat(ns_dir_fd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW)
== -1) {
if (errno == ENOENT)
continue;
pr_perror("Failed to stat ns file %s for ns %s",
ns_dir, namespaces[i]);
exit(1);
}
int fd = openat(ns_dir_fd, namespaces[i], O_RDONLY);
if (fd == -1) {
pr_perror("Failed to open ns file %s for ns %s",
ns_dir, namespaces[i]);
exit(1);
}
// Set the namespace.
if (setns(fd, 0) == -1) {
pr_perror("Failed to setns for %s", namespaces[i]);
exit(1);
}
close(fd);
}
close(ns_dir_fd);
// We must fork to actually enter the PID namespace.
int child = fork();
if (child == -1) {
pr_perror("Unable to fork a process");
exit(1);
}
if (child == 0) {
if (consolefd != -1) {
if (dup2(consolefd, STDIN_FILENO) != 0) {
pr_perror("Failed to dup 0");
exit(1);
}
if (dup2(consolefd, STDOUT_FILENO) != STDOUT_FILENO) {
pr_perror("Failed to dup 1");
exit(1);
}
if (dup2(consolefd, STDERR_FILENO) != STDERR_FILENO) {
pr_perror("Failed to dup 2\n");
exit(1);
}
}
// Finish executing, let the Go runtime take over.
return;
} else {
// Parent, wait for the child.
int status = 0;
if (waitpid(child, &status, 0) == -1) {
pr_perror("nsenter: Failed to waitpid with error");
exit(1);
}
// Forward the child's exit code or re-send its death signal.
if (WIFEXITED(status)) {
exit(WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
kill(getpid(), WTERMSIG(status));
}
exit(1);
}
return;
}

View file

@ -1,10 +0,0 @@
// +build linux
package nsenter
/*
__attribute__((constructor)) init() {
nsenter();
}
*/
import "C"

View file

@ -1,45 +0,0 @@
// +build linux
package namespaces
import (
"os"
"syscall"
"github.com/docker/libcontainer"
)
type initError struct {
Message string `json:"message,omitempty"`
}
func (i initError) Error() string {
return i.Message
}
var namespaceInfo = map[libcontainer.NamespaceType]int{
libcontainer.NEWNET: syscall.CLONE_NEWNET,
libcontainer.NEWNS: syscall.CLONE_NEWNS,
libcontainer.NEWUSER: syscall.CLONE_NEWUSER,
libcontainer.NEWIPC: syscall.CLONE_NEWIPC,
libcontainer.NEWUTS: syscall.CLONE_NEWUTS,
libcontainer.NEWPID: syscall.CLONE_NEWPID,
}
// New returns a newly initialized Pipe for communication between processes
func newInitPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
// GetNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
for _, v := range namespaces {
flag |= namespaceInfo[v.Type]
}
return flag
}

Some files were not shown because too many files have changed in this diff Show more