Merge pull request #6703 from crosbymichael/update-libcontainer-dep

Update libcontainer to 53cfe0a1eba9145bf5329abb
This commit is contained in:
Tibor Vass 2014-06-26 21:20:59 -04:00
commit 67f6b0fd43
35 changed files with 1036 additions and 176 deletions

View file

@ -54,7 +54,7 @@ func systemdSlice(container *libcontainer.Config, context interface{}, value str
}
func apparmorProfile(container *libcontainer.Config, context interface{}, value string) error {
container.Context["apparmor_profile"] = value
container.AppArmorProfile = value
return nil
}

View file

@ -84,8 +84,9 @@ func TestAppArmorProfile(t *testing.T) {
if err := ParseConfiguration(container, nil, opts); err != nil {
t.Fatal(err)
}
if expected := "koye-the-protector"; container.Context["apparmor_profile"] != expected {
t.Fatalf("expected profile %s got %s", expected, container.Context["apparmor_profile"])
if expected := "koye-the-protector"; container.AppArmorProfile != expected {
t.Fatalf("expected profile %s got %s", expected, container.AppArmorProfile)
}
}

View file

@ -32,7 +32,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
// check to see if we are running in ramdisk to disable pivot root
container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
container.Context["restrictions"] = "true"
container.RestrictSys = true
if err := d.createNetwork(container, c); err != nil {
return nil, err
@ -127,10 +127,10 @@ func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
}
container.MountConfig.DeviceNodes = hostDeviceNodes
delete(container.Context, "restrictions")
container.RestrictSys = false
if apparmor.IsEnabled() {
container.Context["apparmor_profile"] = "unconfined"
container.AppArmorProfile = "unconfined"
}
return nil
@ -163,8 +163,8 @@ func (d *driver) setupMounts(container *libcontainer.Config, c *execdriver.Comma
}
func (d *driver) setupLabels(container *libcontainer.Config, c *execdriver.Command) error {
container.Context["process_label"] = c.Config["process_label"][0]
container.Context["mount_label"] = c.Config["mount_label"][0]
container.ProcessLabel = c.Config["process_label"][0]
container.MountConfig.MountLabel = c.Config["mount_label"][0]
return nil
}

View file

@ -35,11 +35,10 @@ func New() *libcontainer.Config {
AllowAllDevices: false,
},
MountConfig: &libcontainer.MountConfig{},
Context: make(map[string]string),
}
if apparmor.IsEnabled() {
container.Context["apparmor_profile"] = "docker-default"
container.AppArmorProfile = "docker-default"
}
return container

View file

@ -63,4 +63,4 @@ mv tmp-tar src/code.google.com/p/go/src/pkg/archive/tar
clone git github.com/godbus/dbus v1
clone git github.com/coreos/go-systemd v2
clone git github.com/docker/libcontainer 5210a236b92a8022a673108f3471fed0a046bd05
clone git github.com/docker/libcontainer 53cfe0a1eba9145bf5329abbb52b0072ccab8a00

View file

@ -6,39 +6,44 @@ Please bear with us while we work on making the libcontainer API stable and some
#### Background
libcontainer specifies configuration options for what a container is. It provides a native Go implementation
for using Linux namespaces with no external dependencies. libcontainer provides many convenience functions for working with namespaces, networking, and management.
libcontainer specifies configuration options for what a container is. It provides a native Go implementation for using Linux namespaces with no external dependencies. libcontainer provides many convenience functions for working with namespaces, networking, and management.
#### Container
A container is a self contained directory that is able to run one or more processes without
affecting the host system. The directory is usually a full system tree. Inside the directory
a `container.json` file is placed with the runtime configuration for how the processes
should be contained and run. Environment, networking, and different capabilities for the
process are specified in this file. The configuration is used for each process executed inside the container.
A container is a self contained execution environment that shares the kernel of the host system and which is (optionally) isolated from other containers in the system.
See the `sample_configs` folder for examples of what the container configuration should look like.
libcontainer may be used to execute a process in a container. If a user tries to run a new process inside an existing container, the new process is added to the processes executing in the container.
Using this configuration and the current directory holding the rootfs for a process, one can use libcontainer to exec the container. Running the life of the namespace, a `pid` file
is written to the current directory with the pid of the namespaced process to the external world. A client can use this pid to wait, kill, or perform other operation with the container. If a user tries to run a new process inside an existing container with a live namespace, the namespace will be joined by the new process.
You may also specify an alternate root place where the `container.json` file is read and where the `pid` file will be saved.
#### Root file system
A container runs with a directory known as its *root file system*, or *rootfs*, mounted as the file system root. The rootfs is usually a full system tree.
#### Configuration
A container is initially configured by supplying configuration data when the container is created.
#### nsinit
`nsinit` is a cli application used as the reference implementation of libcontainer. It is able to
spawn or join new containers giving the current directory. To use `nsinit` cd into a Linux
rootfs and copy a `container.json` file into the directory with your specified configuration.
`nsinit` is a cli application which demonstrates the use of libcontainer. It is able to spawn new containers or join existing containers, based on the current directory.
To execute `/bin/bash` in the current directory as a container just run:
To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into the directory with your specified configuration. Environment, networking, and different capabilities for the container are specified in this file. The configuration is used for each process executed inside the container.
See the `sample_configs` folder for examples of what the container configuration should look like.
To execute `/bin/bash` in the current directory as a container just run the following **as root**:
```bash
nsinit exec /bin/bash
```
If you wish to spawn another process inside the container while your current bash session is
running just run the exact same command again to get another bash shell or change the command. If the original process dies, PID 1, all other processes spawned inside the container will also be killed and the namespace will be removed.
If you wish to spawn another process inside the container while your current bash session is running, run the same command again to get another bash shell (or change the command). If the original process (PID 1) dies, all other processes spawned inside the container will be killed and the namespace will be removed.
You can identify if a process is running in a container by looking to see if `state.json` is in the root of the directory.
You may also specify an alternate root place where the `container.json` file is read and where the `state.json` file will be saved.
You can identify if a process is running in a container by looking to see if `pid` is in the root of the directory.
#### Future
See the [roadmap](ROADMAP.md).

View file

@ -0,0 +1,23 @@
package libcontainer
import (
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/network"
)
// Returns all available stats for the given container.
func GetStats(container *Config, state *State) (*ContainerStats, error) {
var containerStats ContainerStats
stats, err := fs.GetStats(container.Cgroups)
if err != nil {
return &containerStats, err
}
containerStats.CgroupStats = stats
networkStats, err := network.GetStats(&state.NetworkState)
if err != nil {
return &containerStats, err
}
containerStats.NetworkStats = networkStats
return &containerStats, nil
}

View file

@ -0,0 +1,82 @@
// +build linux
package fs
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/docker/libcontainer/cgroups"
)
// NotifyOnOOM sends signals on the returned channel when the cgroup reaches
// its memory limit. The channel is closed when the cgroup is removed.
func NotifyOnOOM(c *cgroups.Cgroup) (<-chan struct{}, error) {
d, err := getCgroupData(c, 0)
if err != nil {
return nil, err
}
return notifyOnOOM(d)
}
func notifyOnOOM(d *data) (<-chan struct{}, error) {
dir, err := d.path("memory")
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
return nil, syserr
}
eventfd := os.NewFile(fd, "eventfd")
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
eventfd.Close()
return nil, err
}
var (
eventControlPath = filepath.Join(dir, "cgroup.event_control")
data = fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
)
if err := writeFile(dir, "cgroup.event_control", data); err != nil {
eventfd.Close()
oomControl.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
oomControl.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}

View file

@ -0,0 +1,86 @@
// +build linux
package fs
import (
"encoding/binary"
"fmt"
"syscall"
"testing"
"time"
)
func TestNotifyOnOOM(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.oom_control": "",
"cgroup.event_control": "",
})
var eventFd, oomControlFd int
ooms, err := notifyOnOOM(helper.CgroupData)
if err != nil {
t.Fatal("expected no error, got:", err)
}
memoryPath, _ := helper.CgroupData.path("memory")
data, err := readFile(memoryPath, "cgroup.event_control")
if err != nil {
t.Fatal("couldn't read event control file:", err)
}
if _, err := fmt.Sscanf(data, "%d %d", &eventFd, &oomControlFd); err != nil {
t.Fatalf("invalid control data %q: %s", data, err)
}
// re-open the eventfd
efd, err := syscall.Dup(eventFd)
if err != nil {
t.Fatal("unable to reopen eventfd:", err)
}
defer syscall.Close(efd)
if err != nil {
t.Fatal("unable to dup event fd:", err)
}
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, 1)
if _, err := syscall.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}
select {
case <-ooms:
case <-time.After(100 * time.Millisecond):
t.Fatal("no notification on oom channel after 100ms")
}
// simulate what happens when a cgroup is destroyed by cleaning up and then
// writing to the eventfd.
helper.cleanup()
if _, err := syscall.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}
// give things a moment to shut down
select {
case _, ok := <-ooms:
if ok {
t.Fatal("expected no oom to be triggered")
}
case <-time.After(100 * time.Millisecond):
}
if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(oomControlFd), syscall.F_GETFD, 0); err != syscall.EBADF {
t.Error("expected oom control to be closed")
}
if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(eventFd), syscall.F_GETFD, 0); err != syscall.EBADF {
t.Error("expected event fd to be closed")
}
}

View file

@ -51,12 +51,17 @@ type Config struct {
// placed into to limit the resources the container has available
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"`
// Context is a generic key value format that allows for additional settings to be passed
// on the container's creation
// This is commonly used to specify apparmor profiles, selinux labels, and different restrictions
// placed on the container's processes
// TODO(vishh): Avoid overloading this field with params for different subsystems. Strongtype this.
Context map[string]string `json:"context,omitempty"`
// AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed
AppArmorProfile string `json:"apparmor_profile,omitempty"`
// ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux
ProcessLabel string `json:"process_label,omitempty"`
// RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and
// /proc/bus
RestrictSys bool `json:"restrict_sys,omitempty"`
}
// Routes can be specified to create entries in the route table as the container is started

View file

@ -3,6 +3,7 @@ package libcontainer
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"github.com/docker/libcontainer/devices"
@ -32,17 +33,27 @@ func containsDevice(expected *devices.Device, values []*devices.Device) bool {
return false
}
func TestConfigJsonFormat(t *testing.T) {
f, err := os.Open("sample_configs/attach_to_bridge.json")
func loadConfig(name string) (*Config, error) {
f, err := os.Open(filepath.Join("sample_configs", name))
if err != nil {
t.Fatal("Unable to open container.json")
return nil, err
}
defer f.Close()
var container *Config
if err := json.NewDecoder(f).Decode(&container); err != nil {
t.Fatalf("failed to decode container config: %s", err)
return nil, err
}
return container, nil
}
func TestConfigJsonFormat(t *testing.T) {
container, err := loadConfig("attach_to_bridge.json")
if err != nil {
t.Fatal(err)
}
if container.Hostname != "koye" {
t.Log("hostname is not set")
t.Fail()
@ -111,8 +122,39 @@ func TestConfigJsonFormat(t *testing.T) {
for _, d := range devices.DefaultSimpleDevices {
if !containsDevice(d, container.MountConfig.DeviceNodes) {
t.Logf("expected defice configuration for %s", d.Path)
t.Logf("expected device configuration for %s", d.Path)
t.Fail()
}
}
if !container.RestrictSys {
t.Log("expected restrict sys to be true")
t.Fail()
}
}
func TestApparmorProfile(t *testing.T) {
container, err := loadConfig("apparmor.json")
if err != nil {
t.Fatal(err)
}
if container.AppArmorProfile != "docker-default" {
t.Fatalf("expected apparmor profile to be docker-default but received %q", container.AppArmorProfile)
}
}
func TestSelinuxLabels(t *testing.T) {
container, err := loadConfig("selinux.json")
if err != nil {
t.Fatal(err)
}
label := "system_u:system_r:svirt_lxc_net_t:s0:c164,c475"
if container.ProcessLabel != label {
t.Fatalf("expected process label %q but received %q", label, container.ProcessLabel)
}
if container.MountConfig.MountLabel != label {
t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel)
}
}

View file

@ -25,8 +25,8 @@ type mount struct {
data string
}
// InitializeMountNamespace setups up the devices, mount points, and filesystems for use inside a
// new mount namepsace
// InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func InitializeMountNamespace(rootfs, console string, mountConfig *MountConfig) error {
var (
err error

View file

@ -2,6 +2,7 @@ package mount
import (
"errors"
"github.com/docker/libcontainer/devices"
)

View file

@ -17,7 +17,7 @@ import (
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
// Move this to libcontainer package.
// Exec performes setup outside of a namespace so that a container can be
// Exec performs setup outside of a namespace so that a container can be
// executed. Exec is a high level function for working with container namespaces.
func Exec(container *libcontainer.Config, term Terminal, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
var (
@ -32,6 +32,7 @@ func Exec(container *libcontainer.Config, term Terminal, rootfs, dataPath string
if err != nil {
return -1, err
}
defer syncPipe.Close()
if container.Tty {
master, console, err = system.CreateMasterAndConsole()
@ -52,16 +53,13 @@ func Exec(container *libcontainer.Config, term Terminal, rootfs, dataPath string
return -1, err
}
// Now we passed the pipe to the child, close our side
syncPipe.CloseChild()
started, err := system.GetProcessStartTime(command.Process.Pid)
if err != nil {
return -1, err
}
if err := WritePid(dataPath, command.Process.Pid, started); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
}
defer DeletePid(dataPath)
// Do this before syncing with child so that no children
// can escape the cgroup
@ -75,14 +73,32 @@ func Exec(container *libcontainer.Config, term Terminal, rootfs, dataPath string
defer cleaner.Cleanup()
}
if err := InitializeNetworking(container, command.Process.Pid, syncPipe); err != nil {
var networkState network.NetworkState
if err := InitializeNetworking(container, command.Process.Pid, syncPipe, &networkState); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
}
state := &libcontainer.State{
InitPid: command.Process.Pid,
InitStartTime: started,
NetworkState: networkState,
}
if err := libcontainer.SaveState(dataPath, state); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
}
defer libcontainer.DeleteState(dataPath)
// Sync with child
syncPipe.Close()
if err := syncPipe.ReadFromChild(); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
}
if startCallback != nil {
startCallback()
@ -101,10 +117,10 @@ func Exec(container *libcontainer.Config, term Terminal, rootfs, dataPath string
// args provided
//
// console: the /dev/console to setup inside the container
// init: the progam executed inside the namespaces
// init: the program executed inside the namespaces
// root: the path to the container json file and information
// pipe: sync pipe to syncronize the parent and child processes
// args: the arguemnts to pass to the container to run as the user's program
// pipe: sync pipe to synchronize the parent and child processes
// args: the arguments to pass to the container to run as the user's program
func DefaultCreateCommand(container *libcontainer.Config, console, rootfs, dataPath, init string, pipe *os.File, args []string) *exec.Cmd {
// get our binary name from arg0 so we can always reexec ourself
env := []string{
@ -135,7 +151,7 @@ func DefaultCreateCommand(container *libcontainer.Config, console, rootfs, dataP
return command
}
// SetupCgroups applies the cgroup restrictions to the process running in the contaienr based
// SetupCgroups applies the cgroup restrictions to the process running in the container based
// on the container's configuration
func SetupCgroups(container *libcontainer.Config, nspid int) (cgroups.ActiveCgroup, error) {
if container.Cgroups != nil {
@ -150,18 +166,17 @@ func SetupCgroups(container *libcontainer.Config, nspid int) (cgroups.ActiveCgro
// InitializeNetworking creates the container's network stack outside of the namespace and moves
// interfaces into the container's net namespaces if necessary
func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *SyncPipe) error {
context := map[string]string{}
func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *SyncPipe, networkState *network.NetworkState) error {
for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.Create((*network.Network)(config), nspid, context); err != nil {
if err := strategy.Create((*network.Network)(config), nspid, networkState); err != nil {
return err
}
}
return pipe.SendToChild(context)
return pipe.SendToChild(networkState)
}
// GetNamespaceFlags parses the container's Namespaces options to set the correct

View file

@ -13,7 +13,7 @@ import (
)
// ExecIn uses an existing pid and joins the pid's namespaces with the new command.
func ExecIn(container *libcontainer.Config, nspid int, args []string) error {
func ExecIn(container *libcontainer.Config, state *libcontainer.State, args []string) error {
// TODO(vmarmol): If this gets too long, send it over a pipe to the child.
// Marshall the container into JSON since it won't be available in the namespace.
containerJson, err := json.Marshal(container)
@ -22,7 +22,7 @@ func ExecIn(container *libcontainer.Config, nspid int, args []string) error {
}
// Enter the namespace and then finish setup
finalArgs := []string{os.Args[0], "nsenter", "--nspid", strconv.Itoa(nspid), "--containerjson", string(containerJson), "--"}
finalArgs := []string{os.Args[0], "nsenter", "--nspid", strconv.Itoa(state.InitPid), "--containerjson", string(containerJson), "--"}
finalArgs = append(finalArgs, args...)
if err := system.Execv(finalArgs[0], finalArgs[0:], os.Environ()); err != nil {
return err
@ -41,8 +41,8 @@ func NsEnter(container *libcontainer.Config, nspid int, args []string) error {
return err
}
if process_label, ok := container.Context["process_label"]; ok {
if err := label.SetProcessLabel(process_label); err != nil {
if container.ProcessLabel != "" {
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
return err
}
}

View file

@ -27,7 +27,13 @@ import (
// Move this to libcontainer package.
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
// and other options required for the new container.
func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error {
func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) (err error) {
defer func() {
if err != nil {
syncPipe.ReportChildError(err)
}
}()
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
if err != nil {
return err
@ -40,12 +46,10 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
}
// We always read this as it is a way to sync with the parent as well
context, err := syncPipe.ReadFromParent()
networkState, err := syncPipe.ReadFromParent()
if err != nil {
syncPipe.Close()
return err
}
syncPipe.Close()
if consolePath != "" {
if err := console.OpenAndDup(consolePath); err != nil {
@ -60,7 +64,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
return fmt.Errorf("setctty %s", err)
}
}
if err := setupNetwork(container, context); err != nil {
if err := setupNetwork(container, networkState); err != nil {
return fmt.Errorf("setup networking %s", err)
}
if err := setupRoute(container); err != nil {
@ -74,6 +78,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
(*mount.MountConfig)(container.MountConfig)); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
if container.Hostname != "" {
if err := system.Sethostname(container.Hostname); err != nil {
return fmt.Errorf("sethostname %s", err)
@ -82,13 +87,16 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
runtime.LockOSThread()
if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil {
return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err)
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
}
if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
return fmt.Errorf("set process label %s", err)
}
if container.Context["restrictions"] != "" {
// TODO: (crosbymichael) make this configurable at the Config level
if container.RestrictSys {
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil {
return err
}
@ -161,14 +169,14 @@ func SetupUser(u string) error {
// setupVethNetwork uses the Network config if it is not nil to initialize
// the new veth interface inside the container for use by changing the name to eth0
// setting the MTU and IP address along with the default gateway
func setupNetwork(container *libcontainer.Config, context map[string]string) error {
func setupNetwork(container *libcontainer.Config, networkState *network.NetworkState) error {
for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
err1 := strategy.Initialize((*network.Network)(config), context)
err1 := strategy.Initialize((*network.Network)(config), networkState)
if err1 != nil {
return err1
}

View file

@ -1,28 +0,0 @@
package namespaces
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
)
// WritePid writes the namespaced processes pid to pid and it's start time
// to the path specified
func WritePid(path string, pid int, startTime string) error {
err := ioutil.WriteFile(filepath.Join(path, "pid"), []byte(fmt.Sprint(pid)), 0655)
if err != nil {
return err
}
return ioutil.WriteFile(filepath.Join(path, "start"), []byte(startTime), 0655)
}
// DeletePid removes the pid and started file from disk when the container's process
// dies and the container is cleanly removed
func DeletePid(path string) error {
err := os.Remove(filepath.Join(path, "pid"))
if serr := os.Remove(filepath.Join(path, "start")); err == nil {
err = serr
}
return err
}

View file

@ -5,6 +5,9 @@ import (
"fmt"
"io/ioutil"
"os"
"syscall"
"github.com/docker/libcontainer/network"
)
// SyncPipe allows communication to and from the child processes
@ -14,24 +17,17 @@ type SyncPipe struct {
parent, child *os.File
}
func NewSyncPipe() (s *SyncPipe, err error) {
s = &SyncPipe{}
s.child, s.parent, err = os.Pipe()
if err != nil {
return nil, err
}
return s, nil
}
func NewSyncPipeFromFd(parendFd, childFd uintptr) (*SyncPipe, error) {
func NewSyncPipeFromFd(parentFd, childFd uintptr) (*SyncPipe, error) {
s := &SyncPipe{}
if parendFd > 0 {
s.parent = os.NewFile(parendFd, "parendPipe")
if parentFd > 0 {
s.parent = os.NewFile(parentFd, "parentPipe")
} else if childFd > 0 {
s.child = os.NewFile(childFd, "childPipe")
} else {
return nil, fmt.Errorf("no valid sync pipe fd specified")
}
return s, nil
}
@ -43,36 +39,64 @@ func (s *SyncPipe) Parent() *os.File {
return s.parent
}
func (s *SyncPipe) SendToChild(context map[string]string) error {
data, err := json.Marshal(context)
func (s *SyncPipe) SendToChild(networkState *network.NetworkState) error {
data, err := json.Marshal(networkState)
if err != nil {
return err
}
s.parent.Write(data)
return syscall.Shutdown(int(s.parent.Fd()), syscall.SHUT_WR)
}
func (s *SyncPipe) ReadFromChild() error {
data, err := ioutil.ReadAll(s.parent)
if err != nil {
return err
}
if len(data) > 0 {
return fmt.Errorf("%s", data)
}
return nil
}
func (s *SyncPipe) ReadFromParent() (map[string]string, error) {
func (s *SyncPipe) ReadFromParent() (*network.NetworkState, error) {
data, err := ioutil.ReadAll(s.child)
if err != nil {
return nil, fmt.Errorf("error reading from sync pipe %s", err)
}
var context map[string]string
var networkState *network.NetworkState
if len(data) > 0 {
if err := json.Unmarshal(data, &context); err != nil {
if err := json.Unmarshal(data, &networkState); err != nil {
return nil, err
}
}
return context, nil
return networkState, nil
}
func (s *SyncPipe) ReportChildError(err error) {
s.child.Write([]byte(err.Error()))
s.CloseChild()
}
func (s *SyncPipe) Close() error {
if s.parent != nil {
s.parent.Close()
}
if s.child != nil {
s.child.Close()
}
return nil
}
func (s *SyncPipe) CloseChild() {
if s.child != nil {
s.child.Close()
s.child = nil
}
}

View file

@ -0,0 +1,20 @@
package namespaces
import (
"os"
"syscall"
)
func NewSyncPipe() (s *SyncPipe, err error) {
s = &SyncPipe{}
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, err
}
s.child = os.NewFile(uintptr(fds[0]), "child syncpipe")
s.parent = os.NewFile(uintptr(fds[1]), "parent syncpipe")
return s, nil
}

View file

@ -0,0 +1,61 @@
package namespaces
import (
"fmt"
"testing"
"github.com/docker/libcontainer/network"
)
func TestSendErrorFromChild(t *testing.T) {
pipe, err := NewSyncPipe()
if err != nil {
t.Fatal(err)
}
defer func() {
if err := pipe.Close(); err != nil {
t.Fatal(err)
}
}()
expected := "something bad happened"
pipe.ReportChildError(fmt.Errorf(expected))
childError := pipe.ReadFromChild()
if childError == nil {
t.Fatal("expected an error to be returned but did not receive anything")
}
if childError.Error() != expected {
t.Fatalf("expected %q but received error message %q", expected, childError.Error())
}
}
func TestSendPayloadToChild(t *testing.T) {
pipe, err := NewSyncPipe()
if err != nil {
t.Fatal(err)
}
defer func() {
if err := pipe.Close(); err != nil {
t.Fatal(err)
}
}()
expected := "libcontainer"
if err := pipe.SendToChild(&network.NetworkState{VethHost: expected}); err != nil {
t.Fatal(err)
}
payload, err := pipe.ReadFromParent()
if err != nil {
t.Fatal(err)
}
if payload.VethHost != expected {
t.Fatalf("expected veth host %q but received %q", expected, payload.VethHost)
}
}

View file

@ -10,11 +10,11 @@ import (
type Loopback struct {
}
func (l *Loopback) Create(n *Network, nspid int, context map[string]string) error {
func (l *Loopback) Create(n *Network, nspid int, networkState *NetworkState) error {
return nil
}
func (l *Loopback) Initialize(config *Network, context map[string]string) error {
func (l *Loopback) Initialize(config *Network, networkState *NetworkState) error {
if err := SetMtu("lo", config.Mtu); err != nil {
return fmt.Errorf("set lo mtu to %d %s", config.Mtu, err)
}

View file

@ -14,17 +14,16 @@ import (
type NetNS struct {
}
func (v *NetNS) Create(n *Network, nspid int, context map[string]string) error {
context["nspath"] = n.NsPath
func (v *NetNS) Create(n *Network, nspid int, networkState *NetworkState) error {
networkState.NsPath = n.NsPath
return nil
}
func (v *NetNS) Initialize(config *Network, context map[string]string) error {
nspath, exists := context["nspath"]
if !exists {
return fmt.Errorf("nspath does not exist in network context")
func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error {
if networkState.NsPath == "" {
return fmt.Errorf("nspath does is not specified in NetworkState")
}
f, err := os.OpenFile(nspath, os.O_RDONLY, 0)
f, err := os.OpenFile(networkState.NsPath, os.O_RDONLY, 0)
if err != nil {
return fmt.Errorf("failed get network namespace fd: %v", err)
}

View file

@ -0,0 +1,68 @@
package network
import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
)
type NetworkStats struct {
RxBytes uint64 `json:"rx_bytes,omitempty"`
RxPackets uint64 `json:"rx_packets,omitempty"`
RxErrors uint64 `json:"rx_errors,omitempty"`
RxDropped uint64 `json:"rx_dropped,omitempty"`
TxBytes uint64 `json:"tx_bytes,omitempty"`
TxPackets uint64 `json:"tx_packets,omitempty"`
TxErrors uint64 `json:"tx_errors,omitempty"`
TxDropped uint64 `json:"tx_dropped,omitempty"`
}
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
func GetStats(networkState *NetworkState) (NetworkStats, error) {
// This can happen if the network runtime information is missing - possible if the container was created by an old version of libcontainer.
if networkState.VethHost == "" {
return NetworkStats{}, nil
}
data, err := readSysfsNetworkStats(networkState.VethHost)
if err != nil {
return NetworkStats{}, err
}
return NetworkStats{
RxBytes: data["rx_bytes"],
RxPackets: data["rx_packets"],
RxErrors: data["rx_errors"],
RxDropped: data["rx_dropped"],
TxBytes: data["tx_bytes"],
TxPackets: data["tx_packets"],
TxErrors: data["tx_errors"],
TxDropped: data["tx_dropped"],
}, nil
}
// Reads all the statistics available under /sys/class/net/<EthInterface>/statistics as a map with file name as key and data as integers.
func readSysfsNetworkStats(ethInterface string) (map[string]uint64, error) {
out := make(map[string]uint64)
fullPath := filepath.Join("/sys/class/net", ethInterface, "statistics/")
err := filepath.Walk(fullPath, func(path string, _ os.FileInfo, _ error) error {
// skip fullPath.
if path == fullPath {
return nil
}
base := filepath.Base(path)
data, err := ioutil.ReadFile(path)
if err != nil {
return err
}
value, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
if err != nil {
return err
}
out[base] = value
return nil
})
return out, err
}

View file

@ -19,8 +19,8 @@ var strategies = map[string]NetworkStrategy{
// NetworkStrategy represents a specific network configuration for
// a container's networking stack
type NetworkStrategy interface {
Create(*Network, int, map[string]string) error
Initialize(*Network, map[string]string) error
Create(*Network, int, *NetworkState) error
Initialize(*Network, *NetworkState) error
}
// GetStrategy returns the specific network strategy for the

View file

@ -27,3 +27,14 @@ type Network struct {
// container's interfaces if a pair is created, specifically in the case of type veth
Mtu int `json:"mtu,omitempty"`
}
// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers
// Do not depend on it outside of libcontainer.
type NetworkState struct {
// The name of the veth interface on the Host.
VethHost string `json:"veth_host,omitempty"`
// The name of the veth interface created inside the container for the child.
VethChild string `json:"veth_child,omitempty"`
// Net namespace path.
NsPath string `json:"ns_path,omitempty"`
}

View file

@ -16,7 +16,7 @@ type Veth struct {
const defaultDevice = "eth0"
func (v *Veth) Create(n *Network, nspid int, context map[string]string) error {
func (v *Veth) Create(n *Network, nspid int, networkState *NetworkState) error {
var (
bridge = n.Bridge
prefix = n.VethPrefix
@ -31,8 +31,6 @@ func (v *Veth) Create(n *Network, nspid int, context map[string]string) error {
if err != nil {
return err
}
context["veth-host"] = name1
context["veth-child"] = name2
if err := SetInterfaceMaster(name1, bridge); err != nil {
return err
}
@ -45,16 +43,16 @@ func (v *Veth) Create(n *Network, nspid int, context map[string]string) error {
if err := SetInterfaceInNamespacePid(name2, nspid); err != nil {
return err
}
networkState.VethHost = name1
networkState.VethChild = name2
return nil
}
func (v *Veth) Initialize(config *Network, context map[string]string) error {
var (
vethChild string
exists bool
)
if vethChild, exists = context["veth-child"]; !exists {
return fmt.Errorf("vethChild does not exist in network context")
func (v *Veth) Initialize(config *Network, networkState *NetworkState) error {
var vethChild = networkState.VethChild
if vethChild == "" {
return fmt.Errorf("vethChild is not specified")
}
if err := InterfaceDown(vethChild); err != nil {
return fmt.Errorf("interface down %s %s", vethChild, err)

View file

@ -19,19 +19,20 @@ var execCommand = cli.Command{
}
func execAction(context *cli.Context) {
var nspid, exitCode int
var exitCode int
container, err := loadContainer()
if err != nil {
log.Fatal(err)
}
if nspid, err = readPid(); err != nil && !os.IsNotExist(err) {
log.Fatalf("unable to read pid: %s", err)
state, err := libcontainer.GetState(dataPath)
if err != nil && !os.IsNotExist(err) {
log.Fatalf("unable to read state.json: %s", err)
}
if nspid > 0 {
err = namespaces.ExecIn(container, nspid, []string(context.Args()))
if state != nil {
err = namespaces.ExecIn(container, state, []string(context.Args()))
} else {
term := namespaces.NewTerminal(os.Stdin, os.Stdout, os.Stderr, container.Tty)
exitCode, err = startContainer(container, term, dataPath, []string(context.Args()))

View file

@ -7,7 +7,6 @@ import (
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups/fs"
)
var statsCommand = cli.Command{
@ -22,7 +21,12 @@ func statsAction(context *cli.Context) {
log.Fatal(err)
}
stats, err := getContainerStats(container)
runtimeCkpt, err := libcontainer.GetState(dataPath)
if err != nil {
log.Fatal(err)
}
stats, err := getStats(container, runtimeCkpt)
if err != nil {
log.Fatalf("Failed to get stats - %v\n", err)
}
@ -31,8 +35,8 @@ func statsAction(context *cli.Context) {
}
// returns the container stats in json format.
func getContainerStats(container *libcontainer.Config) (string, error) {
stats, err := fs.GetStats(container.Cgroups)
func getStats(container *libcontainer.Config, state *libcontainer.State) (string, error) {
stats, err := libcontainer.GetStats(container, state)
if err != nil {
return "", err
}

View file

@ -2,11 +2,9 @@ package main
import (
"encoding/json"
"io/ioutil"
"log"
"os"
"path/filepath"
"strconv"
"github.com/docker/libcontainer"
)
@ -26,20 +24,6 @@ func loadContainer() (*libcontainer.Config, error) {
return container, nil
}
func readPid() (int, error) {
data, err := ioutil.ReadFile(filepath.Join(dataPath, "pid"))
if err != nil {
return -1, err
}
pid, err := strconv.Atoi(string(data))
if err != nil {
return -1, err
}
return pid, nil
}
func openLog(name string) error {
f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755)
if err != nil {

View file

@ -0,0 +1,196 @@
{
"capabilities": [
"CHOWN",
"DAC_OVERRIDE",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL"
],
"cgroups": {
"allowed_devices": [
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 98
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 1,
"path": "/dev/console",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"path": "/dev/tty0",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"minor_number": 1,
"path": "/dev/tty1",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 136,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 2,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 10,
"minor_number": 200,
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
],
"name": "docker-koye",
"parent": "docker"
},
"restrict_sys": true,
"apparmor_profile": "docker-default",
"mount_config": {
"device_nodes": [
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
]
},
"environment": [
"HOME=/",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=koye",
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"networks": [
{
"address": "127.0.0.1/0",
"gateway": "localhost",
"mtu": 1500,
"type": "loopback"
}
],
"tty": true,
"user": "daemon"
}

View file

@ -116,11 +116,7 @@
"name": "docker-koye",
"parent": "docker"
},
"context": {
"mount_label": "",
"process_label": "",
"restrictions": "true"
},
"restrict_sys": true,
"mount_config": {
"device_nodes": [
{

View file

@ -116,11 +116,7 @@
"name": "docker-koye",
"parent": "docker"
},
"context": {
"mount_label": "",
"process_label": "",
"restrictions": "true"
},
"restrict_sys": true,
"mount_config": {
"device_nodes": [
{

View file

@ -0,0 +1,197 @@
{
"capabilities": [
"CHOWN",
"DAC_OVERRIDE",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL"
],
"cgroups": {
"allowed_devices": [
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 98
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 1,
"path": "/dev/console",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"path": "/dev/tty0",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"minor_number": 1,
"path": "/dev/tty1",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 136,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 2,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 10,
"minor_number": 200,
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
],
"name": "docker-koye",
"parent": "docker"
},
"restrict_sys": true,
"process_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475",
"mount_config": {
"mount_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475",
"device_nodes": [
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
]
},
"environment": [
"HOME=/",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=koye",
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"networks": [
{
"address": "127.0.0.1/0",
"gateway": "localhost",
"mtu": 1500,
"type": "loopback"
}
],
"tty": true,
"user": "daemon"
}

View file

@ -0,0 +1,55 @@
package libcontainer
import (
"encoding/json"
"os"
"path/filepath"
"github.com/docker/libcontainer/network"
)
// State represents a running container's state
type State struct {
// InitPid is the init process id in the parent namespace
InitPid int `json:"init_pid,omitempty"`
// InitStartTime is the init process start time
InitStartTime string `json:"init_start_time,omitempty"`
// Network runtime state.
NetworkState network.NetworkState `json:"network_state,omitempty"`
}
// The name of the runtime state file
const stateFile = "state.json"
// SaveState writes the container's runtime state to a state.json file
// in the specified path
func SaveState(basePath string, state *State) error {
f, err := os.Create(filepath.Join(basePath, stateFile))
if err != nil {
return err
}
defer f.Close()
return json.NewEncoder(f).Encode(state)
}
// GetState reads the state.json file for a running container
func GetState(basePath string) (*State, error) {
f, err := os.Open(filepath.Join(basePath, stateFile))
if err != nil {
return nil, err
}
defer f.Close()
var state *State
if err := json.NewDecoder(f).Decode(&state); err != nil {
return nil, err
}
return state, nil
}
// DeleteState deletes the state.json file
func DeleteState(basePath string) error {
return os.Remove(filepath.Join(basePath, stateFile))
}

View file

@ -0,0 +1,11 @@
package libcontainer
import (
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/network"
)
type ContainerStats struct {
NetworkStats network.NetworkStats `json:"network_stats, omitempty"`
CgroupStats *cgroups.Stats `json:"cgroup_stats, omitempty"`
}