Initial commit of libcontainer running docker

Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
This commit is contained in:
Michael Crosby 2014-02-21 17:11:57 -08:00
parent 332755b99d
commit 2419e63d24
7 changed files with 422 additions and 4 deletions

View file

@ -530,6 +530,7 @@ func (container *Container) Start() (err error) {
}
populateCommand(container)
container.command.Env = env
// Setup logging of stdout and stderr to disk
if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {

View file

@ -0,0 +1,41 @@
package namespaces
import (
"github.com/dotcloud/docker/pkg/cgroups"
"github.com/dotcloud/docker/pkg/libcontainer"
)
// getDefaultTemplate returns the docker default for
// the libcontainer configuration file
func getDefaultTemplate() *libcontainer.Container {
return &libcontainer.Container{
Capabilities: libcontainer.Capabilities{
libcontainer.CAP_SETPCAP,
libcontainer.CAP_SYS_MODULE,
libcontainer.CAP_SYS_RAWIO,
libcontainer.CAP_SYS_PACCT,
libcontainer.CAP_SYS_ADMIN,
libcontainer.CAP_SYS_NICE,
libcontainer.CAP_SYS_RESOURCE,
libcontainer.CAP_SYS_TIME,
libcontainer.CAP_SYS_TTY_CONFIG,
libcontainer.CAP_MKNOD,
libcontainer.CAP_AUDIT_WRITE,
libcontainer.CAP_AUDIT_CONTROL,
libcontainer.CAP_MAC_ADMIN,
libcontainer.CAP_MAC_OVERRIDE,
libcontainer.CAP_NET_ADMIN,
},
Namespaces: libcontainer.Namespaces{
libcontainer.CLONE_NEWIPC,
libcontainer.CLONE_NEWNET,
libcontainer.CLONE_NEWNS,
libcontainer.CLONE_NEWPID,
libcontainer.CLONE_NEWUTS,
},
Cgroups: &cgroups.Cgroup{
Name: "docker",
DeviceAccess: false,
},
}
}

View file

@ -0,0 +1,349 @@
package namespaces
import (
"encoding/json"
"errors"
"fmt"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/network"
"github.com/dotcloud/docker/pkg/libcontainer/nsinit"
"github.com/dotcloud/docker/pkg/libcontainer/utils"
"github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/term"
"io"
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
)
const (
DriverName = "namespaces"
Version = "0.1"
)
var (
ErrNotSupported = errors.New("not supported")
)
func init() {
execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
return nil
})
}
type driver struct {
}
func NewDriver() (*driver, error) {
return &driver{}, nil
}
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
container := createContainer(c)
if err := writeContainerFile(container, c.Rootfs); err != nil {
return -1, err
}
var (
console string
master *os.File
err error
inPipe io.WriteCloser
outPipe, errPipe io.ReadCloser
)
if container.Tty {
log.Printf("setting up master and console")
master, console, err = createMasterAndConsole()
if err != nil {
return -1, err
}
}
c.Terminal = NewTerm(pipes, master)
// create a pipe so that we can syncronize with the namespaced process and
// pass the veth name to the child
r, w, err := os.Pipe()
if err != nil {
return -1, err
}
system.UsetCloseOnExec(r.Fd())
args := append([]string{c.Entrypoint}, c.Arguments...)
createCommand(c, container, console, "/nsinit.logs", r.Fd(), args)
command := c
if !container.Tty {
log.Printf("opening pipes on command")
if inPipe, err = command.StdinPipe(); err != nil {
return -1, err
}
if outPipe, err = command.StdoutPipe(); err != nil {
return -1, err
}
if errPipe, err = command.StderrPipe(); err != nil {
return -1, err
}
}
log.Printf("staring init")
if err := command.Start(); err != nil {
return -1, err
}
log.Printf("writting state file")
if err := writePidFile(c.Rootfs, command.Process.Pid); err != nil {
command.Process.Kill()
return -1, err
}
defer deletePidFile(c.Rootfs)
// Do this before syncing with child so that no children
// can escape the cgroup
if container.Cgroups != nil {
log.Printf("setting up cgroups")
if err := container.Cgroups.Apply(command.Process.Pid); err != nil {
command.Process.Kill()
return -1, err
}
}
if container.Network != nil {
log.Printf("creating veth pair")
vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid)
if err != nil {
return -1, err
}
log.Printf("sending %s as veth pair name", vethPair)
sendVethName(w, vethPair)
}
// Sync with child
log.Printf("closing sync pipes")
w.Close()
r.Close()
if container.Tty {
log.Printf("starting copy for tty")
go io.Copy(pipes.Stdout, master)
if pipes.Stdin != nil {
go io.Copy(master, pipes.Stdin)
}
/*
state, err := setupWindow(master)
if err != nil {
command.Process.Kill()
return -1, err
}
defer term.RestoreTerminal(uintptr(syscall.Stdin), state)
*/
} else {
log.Printf("starting copy for std pipes")
if pipes.Stdin != nil {
go func() {
defer inPipe.Close()
io.Copy(inPipe, pipes.Stdin)
}()
}
go io.Copy(pipes.Stdout, outPipe)
go io.Copy(pipes.Stderr, errPipe)
}
if startCallback != nil {
startCallback(c)
}
log.Printf("waiting on process")
if err := command.Wait(); err != nil {
if _, ok := err.(*exec.ExitError); !ok {
return -1, err
}
}
log.Printf("process ended")
return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
}
func (d *driver) Kill(p *execdriver.Command, sig int) error {
return p.Process.Kill()
}
func (d *driver) Restore(c *execdriver.Command) error {
return ErrNotSupported
}
func (d *driver) Info(id string) execdriver.Info {
return nil
}
func (d *driver) Name() string {
return fmt.Sprintf("%s-%s", DriverName, Version)
}
func (d *driver) GetPidsForContainer(id string) ([]int, error) {
return nil, ErrNotSupported
}
func writeContainerFile(container *libcontainer.Container, rootfs string) error {
data, err := json.Marshal(container)
if err != nil {
return err
}
return ioutil.WriteFile(filepath.Join(rootfs, "container.json"), data, 0755)
}
func getEnv(key string, env []string) string {
for _, pair := range env {
parts := strings.Split(pair, "=")
if parts[0] == key {
return parts[1]
}
}
return ""
}
// sendVethName writes the veth pair name to the child's stdin then closes the
// pipe so that the child stops waiting for more data
func sendVethName(pipe io.Writer, name string) {
fmt.Fprint(pipe, name)
}
// initializeContainerVeth will create a veth pair and setup the host's
// side of the pair by setting the specified bridge as the master and bringing
// up the interface.
//
// Then will with set the other side of the veth pair into the container's namespaced
// using the pid and returns the veth's interface name to provide to the container to
// finish setting up the interface inside the namespace
func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) {
name1, name2, err := createVethPair()
if err != nil {
return "", err
}
log.Printf("veth pair created %s <> %s", name1, name2)
if err := network.SetInterfaceMaster(name1, bridge); err != nil {
return "", err
}
if err := network.SetMtu(name1, mtu); err != nil {
return "", err
}
if err := network.InterfaceUp(name1); err != nil {
return "", err
}
log.Printf("setting %s inside %d namespace", name2, nspid)
if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil {
return "", err
}
return name2, nil
}
func setupWindow(master *os.File) (*term.State, error) {
ws, err := term.GetWinsize(os.Stdin.Fd())
if err != nil {
return nil, err
}
if err := term.SetWinsize(master.Fd(), ws); err != nil {
return nil, err
}
return term.SetRawTerminal(os.Stdin.Fd())
}
// createMasterAndConsole will open /dev/ptmx on the host and retreive the
// pts name for use as the pty slave inside the container
func createMasterAndConsole() (*os.File, string, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, "", err
}
console, err := system.Ptsname(master)
if err != nil {
return nil, "", err
}
if err := system.Unlockpt(master); err != nil {
return nil, "", err
}
return master, console, nil
}
// createVethPair will automatically generage two random names for
// the veth pair and ensure that they have been created
func createVethPair() (name1 string, name2 string, err error) {
name1, err = utils.GenerateRandomName("dock", 4)
if err != nil {
return
}
name2, err = utils.GenerateRandomName("dock", 4)
if err != nil {
return
}
if err = network.CreateVethPair(name1, name2); err != nil {
return
}
return
}
// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container
func writePidFile(rootfs string, pid int) error {
return ioutil.WriteFile(filepath.Join(rootfs, ".nspid"), []byte(fmt.Sprint(pid)), 0655)
}
func deletePidFile(rootfs string) error {
return os.Remove(filepath.Join(rootfs, ".nspid"))
}
// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
// defined on the container's configuration and use the current binary as the init with the
// args provided
func createCommand(c *execdriver.Command, container *libcontainer.Container,
console, logFile string, pipe uintptr, args []string) {
aname, _ := exec.LookPath("nsinit")
c.Path = aname
c.Args = append([]string{
aname,
"-console", console,
"-pipe", fmt.Sprint(pipe),
"-log", logFile,
"init",
}, args...)
c.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)),
}
c.Env = container.Env
c.Dir = c.Rootfs
}
func createContainer(c *execdriver.Command) *libcontainer.Container {
container := getDefaultTemplate()
container.Hostname = getEnv("HOSTNAME", c.Env)
container.Tty = c.Tty
container.User = c.User
container.WorkingDir = c.WorkingDir
container.Env = c.Env
container.Env = append(container.Env, "container=docker")
if c.Network != nil {
container.Network = &libcontainer.Network{
Mtu: c.Network.Mtu,
Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen),
Gateway: c.Network.Gateway,
Bridge: c.Network.Bridge,
}
}
if c.Privileged {
container.Capabilities = nil
}
if c.Resources != nil {
container.Cgroups.CpuShares = c.Resources.CpuShares
container.Cgroups.Memory = c.Resources.Memory
container.Cgroups.MemorySwap = c.Resources.MemorySwap
}
return container
}

View file

@ -0,0 +1,26 @@
package namespaces
import (
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/pkg/term"
"os"
)
type NsinitTerm struct {
master *os.File
}
func NewTerm(pipes *execdriver.Pipes, master *os.File) *NsinitTerm {
return &NsinitTerm{master}
}
func (t *NsinitTerm) Close() error {
return t.master.Close()
}
func (t *NsinitTerm) Resize(h, w int) error {
if t.master != nil {
return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
}
return nil
}

View file

@ -227,7 +227,7 @@ func createCommand(container *libcontainer.Container, console, logFile string, p
"init"}, args...)...)
command.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)),
Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)),
}
command.Env = container.Env
return command

View file

@ -28,7 +28,7 @@ var namespaceFileMap = map[libcontainer.Namespace]string{
// getNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
for _, ns := range namespaces {
flag |= namespaceMap[ns]
}

View file

@ -7,7 +7,8 @@ import (
"github.com/dotcloud/docker/dockerversion"
"github.com/dotcloud/docker/engine"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/execdriver/lxc"
_ "github.com/dotcloud/docker/execdriver/lxc"
"github.com/dotcloud/docker/execdriver/namespaces"
"github.com/dotcloud/docker/graphdriver"
"github.com/dotcloud/docker/graphdriver/aufs"
_ "github.com/dotcloud/docker/graphdriver/btrfs"
@ -703,7 +704,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime
sysInfo := sysinfo.New(false)
ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor)
ed, err := namespaces.NewDriver()
if err != nil {
return nil, err
}