Merge pull request #3580 from crosbymichael/extract-lxc

Move LXC into a sub pkg and provide initial execution driver interface
This commit is contained in:
Guillaume J. Charmes 2014-01-17 18:10:10 -08:00
commit fec97e72db
20 changed files with 1057 additions and 615 deletions

6
api.go
View file

@ -657,16 +657,16 @@ func postContainersCreate(srv *Server, version float64, w http.ResponseWriter, r
for scanner.Scan() {
out.Warnings = append(out.Warnings, scanner.Text())
}
if job.GetenvInt("Memory") > 0 && !srv.runtime.capabilities.MemoryLimit {
if job.GetenvInt("Memory") > 0 && !srv.runtime.sysInfo.MemoryLimit {
log.Println("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.")
out.Warnings = append(out.Warnings, "Your kernel does not support memory limit capabilities. Limitation discarded.")
}
if job.GetenvInt("Memory") > 0 && !srv.runtime.capabilities.SwapLimit {
if job.GetenvInt("Memory") > 0 && !srv.runtime.sysInfo.SwapLimit {
log.Println("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.")
out.Warnings = append(out.Warnings, "Your kernel does not support memory swap capabilities. Limitation discarded.")
}
if !job.GetenvBool("NetworkDisabled") && srv.runtime.capabilities.IPv4ForwardingDisabled {
if !job.GetenvBool("NetworkDisabled") && srv.runtime.sysInfo.IPv4ForwardingDisabled {
log.Println("Warning: IPv4 forwarding is disabled.")
out.Warnings = append(out.Warnings, "IPv4 forwarding is disabled.")
}

View file

@ -12,8 +12,13 @@ import (
"strings"
)
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
type Values struct {
Memory int64 `json:"memory"`
MemorySwap int64 `json:"memory_swap"`
CpuShares int64 `json:"cpu_shares"`
}
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
mounts, err := mount.GetMounts()
if err != nil {

View file

@ -12,6 +12,7 @@ import (
"github.com/dotcloud/docker/auth"
"github.com/dotcloud/docker/engine"
flag "github.com/dotcloud/docker/pkg/mflag"
"github.com/dotcloud/docker/pkg/sysinfo"
"github.com/dotcloud/docker/pkg/term"
"github.com/dotcloud/docker/registry"
"github.com/dotcloud/docker/utils"
@ -470,7 +471,7 @@ func (cli *DockerCli) CmdInfo(args ...string) error {
fmt.Fprintf(cli.out, "Debug mode (client): %v\n", os.Getenv("DEBUG") != "")
fmt.Fprintf(cli.out, "Fds: %d\n", remoteInfo.GetInt("NFd"))
fmt.Fprintf(cli.out, "Goroutines: %d\n", remoteInfo.GetInt("NGoroutines"))
fmt.Fprintf(cli.out, "LXC Version: %s\n", remoteInfo.Get("LXCVersion"))
fmt.Fprintf(cli.out, "Execution Driver: %s\n", remoteInfo.Get("ExecutionDriver"))
fmt.Fprintf(cli.out, "EventsListeners: %d\n", remoteInfo.GetInt("NEventsListener"))
fmt.Fprintf(cli.out, "Kernel Version: %s\n", remoteInfo.Get("KernelVersion"))
@ -1745,14 +1746,14 @@ func (cli *DockerCli) CmdTag(args ...string) error {
}
//FIXME Only used in tests
func ParseRun(args []string, capabilities *Capabilities) (*Config, *HostConfig, *flag.FlagSet, error) {
func ParseRun(args []string, sysInfo *sysinfo.SysInfo) (*Config, *HostConfig, *flag.FlagSet, error) {
cmd := flag.NewFlagSet("run", flag.ContinueOnError)
cmd.SetOutput(ioutil.Discard)
cmd.Usage = nil
return parseRun(cmd, args, capabilities)
return parseRun(cmd, args, sysInfo)
}
func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Config, *HostConfig, *flag.FlagSet, error) {
func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config, *HostConfig, *flag.FlagSet, error) {
var (
// FIXME: use utils.ListOpts for attach and volumes?
flAttach = NewListOpts(ValidateAttach)
@ -1802,7 +1803,7 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
}
// Check if the kernel supports memory limit cgroup.
if capabilities != nil && *flMemoryString != "" && !capabilities.MemoryLimit {
if sysInfo != nil && *flMemoryString != "" && !sysInfo.MemoryLimit {
*flMemoryString = ""
}
@ -1934,7 +1935,7 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co
PublishAllPorts: *flPublishAll,
}
if capabilities != nil && flMemory > 0 && !capabilities.SwapLimit {
if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit {
//fmt.Fprintf(stdout, "WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n")
config.MemorySwap = -1
}

View file

@ -1,11 +1,12 @@
package docker
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"github.com/dotcloud/docker/archive"
"github.com/dotcloud/docker/cgroups"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/graphdriver"
"github.com/dotcloud/docker/mount"
"github.com/dotcloud/docker/pkg/term"
@ -16,9 +17,7 @@ import (
"log"
"net"
"os"
"os/exec"
"path"
"strconv"
"strings"
"sync"
"syscall"
@ -55,7 +54,7 @@ type Container struct {
Name string
Driver string
cmd *exec.Cmd
process *execdriver.Process
stdout *utils.WriteBroadcaster
stderr *utils.WriteBroadcaster
stdin io.ReadCloser
@ -235,10 +234,6 @@ func (container *Container) Inject(file io.Reader, pth string) error {
return nil
}
func (container *Container) Cmd() *exec.Cmd {
return container.cmd
}
func (container *Container) When() time.Time {
return container.Created
}
@ -305,23 +300,14 @@ func (container *Container) generateEnvConfig(env []string) error {
return nil
}
func (container *Container) generateLXCConfig() error {
fo, err := os.Create(container.lxcConfigPath())
if err != nil {
return err
}
defer fo.Close()
return LxcTemplateCompiled.Execute(fo, container)
}
func (container *Container) startPty() error {
func (container *Container) setupPty() error {
ptyMaster, ptySlave, err := pty.Open()
if err != nil {
return err
}
container.ptyMaster = ptyMaster
container.cmd.Stdout = ptySlave
container.cmd.Stderr = ptySlave
container.process.Stdout = ptySlave
container.process.Stderr = ptySlave
// Copy the PTYs to our broadcasters
go func() {
@ -333,8 +319,8 @@ func (container *Container) startPty() error {
// stdin
if container.Config.OpenStdin {
container.cmd.Stdin = ptySlave
container.cmd.SysProcAttr.Setctty = true
container.process.Stdin = ptySlave
container.process.SysProcAttr.Setctty = true
go func() {
defer container.stdin.Close()
utils.Debugf("startPty: begin of stdin pipe")
@ -342,18 +328,14 @@ func (container *Container) startPty() error {
utils.Debugf("startPty: end of stdin pipe")
}()
}
if err := container.cmd.Start(); err != nil {
return err
}
ptySlave.Close()
return nil
}
func (container *Container) start() error {
container.cmd.Stdout = container.stdout
container.cmd.Stderr = container.stderr
func (container *Container) setupStd() error {
container.process.Stdout = container.stdout
container.process.Stderr = container.stderr
if container.Config.OpenStdin {
stdin, err := container.cmd.StdinPipe()
stdin, err := container.process.StdinPipe()
if err != nil {
return err
}
@ -364,7 +346,7 @@ func (container *Container) start() error {
utils.Debugf("start: end of stdin pipe")
}()
}
return container.cmd.Start()
return nil
}
func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, stdout io.Writer, stderr io.Writer) chan error {
@ -384,12 +366,14 @@ func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, s
if container.Config.StdinOnce && !container.Config.Tty {
defer cStdin.Close()
} else {
if cStdout != nil {
defer cStdout.Close()
}
if cStderr != nil {
defer cStderr.Close()
}
defer func() {
if cStdout != nil {
cStdout.Close()
}
if cStderr != nil {
cStderr.Close()
}
}()
}
if container.Config.Tty {
_, err = utils.CopyEscapable(cStdin, stdin)
@ -485,12 +469,15 @@ func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, s
}
return utils.Go(func() error {
if cStdout != nil {
defer cStdout.Close()
}
if cStderr != nil {
defer cStderr.Close()
}
defer func() {
if cStdout != nil {
cStdout.Close()
}
if cStderr != nil {
cStderr.Close()
}
}()
// FIXME: how to clean up the stdin goroutine without the unwanted side effect
// of closing the passed stdin? Add an intermediary io.Pipe?
for i := 0; i < nJobs; i += 1 {
@ -532,16 +519,16 @@ func (container *Container) Start() (err error) {
}
// Make sure the config is compatible with the current kernel
if container.Config.Memory > 0 && !container.runtime.capabilities.MemoryLimit {
if container.Config.Memory > 0 && !container.runtime.sysInfo.MemoryLimit {
log.Printf("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n")
container.Config.Memory = 0
}
if container.Config.Memory > 0 && !container.runtime.capabilities.SwapLimit {
if container.Config.Memory > 0 && !container.runtime.sysInfo.SwapLimit {
log.Printf("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n")
container.Config.MemorySwap = -1
}
if container.runtime.capabilities.IPv4ForwardingDisabled {
if container.runtime.sysInfo.IPv4ForwardingDisabled {
log.Printf("WARNING: IPv4 forwarding is disabled. Networking will not work")
}
@ -559,38 +546,6 @@ func (container *Container) Start() (err error) {
return err
}
if err := container.generateLXCConfig(); err != nil {
return err
}
var lxcStart string = "lxc-start"
if container.hostConfig.Privileged && container.runtime.capabilities.AppArmor {
lxcStart = path.Join(container.runtime.config.Root, "lxc-start-unconfined")
}
params := []string{
lxcStart,
"-n", container.ID,
"-f", container.lxcConfigPath(),
"--",
"/.dockerinit",
}
// Networking
if !container.Config.NetworkDisabled {
network := container.NetworkSettings
params = append(params,
"-g", network.Gateway,
"-i", fmt.Sprintf("%s/%d", network.IPAddress, network.IPPrefixLen),
"-mtu", strconv.Itoa(container.runtime.config.Mtu),
)
}
// User
if container.Config.User != "" {
params = append(params, "-u", container.Config.User)
}
// Setup environment
env := []string{
"HOME=/",
@ -602,10 +557,6 @@ func (container *Container) Start() (err error) {
env = append(env, "TERM=xterm")
}
if container.hostConfig.Privileged {
params = append(params, "-privileged")
}
// Init any links between the parent and children
runtime := container.runtime
@ -653,37 +604,12 @@ func (container *Container) Start() (err error) {
return err
}
var workingDir string
if container.Config.WorkingDir != "" {
workingDir := path.Clean(container.Config.WorkingDir)
utils.Debugf("[working dir] working dir is %s", workingDir)
workingDir = path.Clean(container.Config.WorkingDir)
if err := os.MkdirAll(path.Join(container.RootfsPath(), workingDir), 0755); err != nil {
return nil
}
params = append(params,
"-w", workingDir,
)
}
// Program
params = append(params, "--", container.Path)
params = append(params, container.Args...)
if RootIsShared() {
// lxc-start really needs / to be non-shared, or all kinds of stuff break
// when lxc-start unmount things and those unmounts propagate to the main
// mount namespace.
// What we really want is to clone into a new namespace and then
// mount / MS_REC|MS_SLAVE, but since we can't really clone or fork
// without exec in go we have to do this horrible shell hack...
shellString :=
"mount --make-rslave /; exec " +
utils.ShellQuoteArguments(params)
params = []string{
"unshare", "-m", "--", "/bin/sh", "-c", shellString,
}
}
root := container.RootfsPath()
@ -713,7 +639,6 @@ func (container *Container) Start() (err error) {
}
// Mount user specified volumes
for r, v := range container.Volumes {
mountAs := "ro"
if container.VolumesRW[r] {
@ -725,7 +650,48 @@ func (container *Container) Start() (err error) {
}
}
container.cmd = exec.Command(params[0], params[1:]...)
var (
en *execdriver.Network
driverConfig []string
)
if !container.Config.NetworkDisabled {
network := container.NetworkSettings
en = &execdriver.Network{
Gateway: network.Gateway,
Bridge: network.Bridge,
IPAddress: network.IPAddress,
IPPrefixLen: network.IPPrefixLen,
Mtu: container.runtime.config.Mtu,
}
}
if lxcConf := container.hostConfig.LxcConf; lxcConf != nil {
for _, pair := range lxcConf {
driverConfig = append(driverConfig, fmt.Sprintf("%s = %s", pair.Key, pair.Value))
}
}
cgroupValues := &cgroups.Values{
Memory: container.Config.Memory,
MemorySwap: container.Config.MemorySwap,
CpuShares: container.Config.CpuShares,
}
container.process = &execdriver.Process{
ID: container.ID,
Privileged: container.hostConfig.Privileged,
Rootfs: root,
InitPath: "/.dockerinit",
Entrypoint: container.Path,
Arguments: container.Args,
WorkingDir: workingDir,
Network: en,
Tty: container.Config.Tty,
User: container.Config.User,
Config: driverConfig,
Cgroups: cgroupValues,
}
container.process.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
// Setup logging of stdout and stderr to disk
if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {
@ -734,59 +700,47 @@ func (container *Container) Start() (err error) {
if err := container.runtime.LogToDisk(container.stderr, container.logPath("json"), "stderr"); err != nil {
return err
}
container.cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
if container.Config.Tty {
err = container.startPty()
} else {
err = container.start()
}
if err != nil {
return err
}
// FIXME: save state on disk *first*, then converge
// this way disk state is used as a journal, eg. we can restore after crash etc.
container.State.SetRunning(container.cmd.Process.Pid)
// Init the lock
container.waitLock = make(chan struct{})
container.ToDisk()
go container.monitor()
// Setuping pipes and/or Pty
var setup func() error
if container.Config.Tty {
setup = container.setupPty
} else {
setup = container.setupStd
}
if err := setup(); err != nil {
return err
}
defer utils.Debugf("Container running: %v", container.State.IsRunning())
// We wait for the container to be fully running.
// Timeout after 5 seconds. In case of broken pipe, just retry.
// Note: The container can run and finish correctly before
// the end of this loop
for now := time.Now(); time.Since(now) < 5*time.Second; {
// If the container dies while waiting for it, just return
if !container.State.IsRunning() {
return nil
}
output, err := exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput()
if err != nil {
utils.Debugf("Error with lxc-info: %s (%s)", err, output)
output, err = exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput()
if err != nil {
utils.Debugf("Second Error with lxc-info: %s (%s)", err, output)
return err
callbackLock := make(chan struct{})
callback := func(process *execdriver.Process) {
container.State.SetRunning(process.Pid())
if process.Tty {
// The callback is called after the process Start()
// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace
// which we close here.
if c, ok := process.Stdout.(io.Closer); ok {
c.Close()
}
}
if strings.Contains(string(output), "RUNNING") {
return nil
if err := container.ToDisk(); err != nil {
utils.Debugf("%s", err)
}
utils.Debugf("Waiting for the container to start (running: %v): %s", container.State.IsRunning(), bytes.TrimSpace(output))
time.Sleep(50 * time.Millisecond)
close(callbackLock)
}
if container.State.IsRunning() {
return ErrContainerStartTimeout
// We use a callback here instead of a goroutine and an chan for
// syncronization purposes
cErr := utils.Go(func() error { return container.monitor(callback) })
// Start should not return until the process is actually running
select {
case <-callbackLock:
case err := <-cErr:
return err
}
return ErrContainerStart
return nil
}
func (container *Container) getBindMap() (map[string]BindMap, error) {
@ -1159,48 +1113,24 @@ func (container *Container) releaseNetwork() {
container.NetworkSettings = &NetworkSettings{}
}
// FIXME: replace this with a control socket within dockerinit
func (container *Container) waitLxc() error {
for {
output, err := exec.Command("lxc-info", "-n", container.ID).CombinedOutput()
if err != nil {
return err
}
if !strings.Contains(string(output), "RUNNING") {
return nil
}
time.Sleep(500 * time.Millisecond)
}
}
func (container *Container) monitor(callback execdriver.StartCallback) error {
var (
err error
exitCode int
)
func (container *Container) monitor() {
// Wait for the program to exit
// If the command does not exist, try to wait via lxc
// (This probably happens only for ghost containers, i.e. containers that were running when Docker started)
if container.cmd == nil {
utils.Debugf("monitor: waiting for container %s using waitLxc", container.ID)
if err := container.waitLxc(); err != nil {
utils.Errorf("monitor: while waiting for container %s, waitLxc had a problem: %s", container.ID, err)
}
if container.process == nil {
// This happends when you have a GHOST container with lxc
err = container.runtime.WaitGhost(container)
} else {
utils.Debugf("monitor: waiting for container %s using cmd.Wait", container.ID)
if err := container.cmd.Wait(); err != nil {
// Since non-zero exit status and signal terminations will cause err to be non-nil,
// we have to actually discard it. Still, log it anyway, just in case.
utils.Debugf("monitor: cmd.Wait reported exit status %s for container %s", err, container.ID)
exitCode, err = container.runtime.Run(container, callback)
}
if err != nil {
if container.runtime != nil && container.runtime.srv != nil {
container.runtime.srv.LogEvent("die", container.ID, container.runtime.repositories.ImageName(container.Image))
}
}
utils.Debugf("monitor: container %s finished", container.ID)
exitCode := -1
if container.cmd != nil {
exitCode = container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
}
if container.runtime != nil && container.runtime.srv != nil {
container.runtime.srv.LogEvent("die", container.ID, container.runtime.repositories.ImageName(container.Image))
}
// Cleanup
container.cleanup()
@ -1210,21 +1140,20 @@ func (container *Container) monitor() {
container.stdin, container.stdinPipe = io.Pipe()
}
// Report status back
container.State.SetStopped(exitCode)
// Release the lock
close(container.waitLock)
if err := container.ToDisk(); err != nil {
// FIXME: there is a race condition here which causes this to fail during the unit tests.
// If another goroutine was waiting for Wait() to return before removing the container's root
// from the filesystem... At this point it may already have done so.
// This is because State.setStopped() has already been called, and has caused Wait()
// to return.
// FIXME: why are we serializing running state to disk in the first place?
//log.Printf("%s: Failed to dump configuration to the disk: %s", container.ID, err)
}
// FIXME: there is a race condition here which causes this to fail during the unit tests.
// If another goroutine was waiting for Wait() to return before removing the container's root
// from the filesystem... At this point it may already have done so.
// This is because State.setStopped() has already been called, and has caused Wait()
// to return.
// FIXME: why are we serializing running state to disk in the first place?
//log.Printf("%s: Failed to dump configuration to the disk: %s", container.ID, err)
container.ToDisk()
return err
}
func (container *Container) cleanup() {
@ -1267,13 +1196,7 @@ func (container *Container) kill(sig int) error {
if !container.State.IsRunning() {
return nil
}
if output, err := exec.Command("lxc-kill", "-n", container.ID, strconv.Itoa(sig)).CombinedOutput(); err != nil {
log.Printf("error killing container %s (%s, %s)", utils.TruncateID(container.ID), output, err)
return err
}
return nil
return container.runtime.Kill(container, sig)
}
func (container *Container) Kill() error {
@ -1288,11 +1211,11 @@ func (container *Container) Kill() error {
// 2. Wait for the process to die, in last resort, try to kill the process directly
if err := container.WaitTimeout(10 * time.Second); err != nil {
if container.cmd == nil {
if container.process == nil {
return fmt.Errorf("lxc-kill failed, impossible to kill the container %s", utils.TruncateID(container.ID))
}
log.Printf("Container %s failed to exit within 10 seconds of lxc-kill %s - trying direct SIGKILL", "SIGKILL", utils.TruncateID(container.ID))
if err := container.cmd.Process.Kill(); err != nil {
if err := container.runtime.Kill(container, 9); err != nil {
return err
}
}
@ -1463,10 +1386,6 @@ func (container *Container) EnvConfigPath() (string, error) {
return p, nil
}
func (container *Container) lxcConfigPath() string {
return path.Join(container.root, "config.lxc")
}
// This method must be exported to be used from the lxc template
func (container *Container) RootfsPath() string {
return container.rootfs

2
execdriver/MAINTAINERS Normal file
View file

@ -0,0 +1,2 @@
Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
Guillaume Charmes <guillaume@dotcloud.com> (@creack)

View file

@ -0,0 +1,87 @@
package chroot
import (
"fmt"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/mount"
"os"
"os/exec"
)
const (
DriverName = "chroot"
Version = "0.1"
)
func init() {
execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
if err := mount.ForceMount("proc", "proc", "proc", ""); err != nil {
return err
}
defer mount.ForceUnmount("proc")
cmd := exec.Command(args.Args[0], args.Args[1:]...)
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
cmd.Stdin = os.Stdin
return cmd.Run()
})
}
type driver struct {
}
func NewDriver() (*driver, error) {
return &driver{}, nil
}
func (d *driver) Run(c *execdriver.Process, startCallback execdriver.StartCallback) (int, error) {
params := []string{
"chroot",
c.Rootfs,
"/.dockerinit",
"-driver",
DriverName,
}
params = append(params, c.Entrypoint)
params = append(params, c.Arguments...)
var (
name = params[0]
arg = params[1:]
)
aname, err := exec.LookPath(name)
if err != nil {
aname = name
}
c.Path = aname
c.Args = append([]string{name}, arg...)
if err := c.Start(); err != nil {
return -1, err
}
if startCallback != nil {
startCallback(c)
}
err = c.Wait()
return c.GetExitCode(), err
}
func (d *driver) Kill(p *execdriver.Process, sig int) error {
return p.Process.Kill()
}
func (d *driver) Wait(id string) error {
panic("Not Implemented")
}
func (d *driver) Info(id string) execdriver.Info {
panic("Not implemented")
}
func (d *driver) Name() string {
return fmt.Sprintf("%s-%s", DriverName, Version)
}

115
execdriver/driver.go Normal file
View file

@ -0,0 +1,115 @@
package execdriver
import (
"errors"
"github.com/dotcloud/docker/cgroups"
"os/exec"
"syscall"
)
var (
ErrNotRunning = errors.New("Process could not be started")
ErrWaitTimeoutReached = errors.New("Wait timeout reached")
ErrDriverAlreadyRegistered = errors.New("A driver already registered this docker init function")
ErrDriverNotFound = errors.New("The requested docker init has not been found")
)
var dockerInitFcts map[string]InitFunc
type (
StartCallback func(*Process)
InitFunc func(i *InitArgs) error
)
func RegisterInitFunc(name string, fct InitFunc) error {
if dockerInitFcts == nil {
dockerInitFcts = make(map[string]InitFunc)
}
if _, ok := dockerInitFcts[name]; ok {
return ErrDriverAlreadyRegistered
}
dockerInitFcts[name] = fct
return nil
}
func GetInitFunc(name string) (InitFunc, error) {
fct, ok := dockerInitFcts[name]
if !ok {
return nil, ErrDriverNotFound
}
return fct, nil
}
// Args provided to the init function for a driver
type InitArgs struct {
User string
Gateway string
Ip string
WorkDir string
Privileged bool
Env []string
Args []string
Mtu int
Driver string
}
// Driver specific information based on
// processes registered with the driver
type Info interface {
IsRunning() bool
}
type Driver interface {
Run(c *Process, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
Kill(c *Process, sig int) error
Wait(id string) error // Wait on an out of process...process - lxc ghosts TODO: Rename to reattach, reconnect
Name() string // Driver name
Info(id string) Info // "temporary" hack (until we move state from core to plugins)
}
// Network settings of the container
type Network struct {
Gateway string `json:"gateway"`
IPAddress string `json:"ip"`
Bridge string `json:"bridge"`
IPPrefixLen int `json:"ip_prefix_len"`
Mtu int `json:"mtu"`
}
// Process wrapps an os/exec.Cmd to add more metadata
// TODO: Rename to Command
type Process struct {
exec.Cmd
ID string `json:"id"`
Privileged bool `json:"privileged"`
User string `json:"user"`
Rootfs string `json:"rootfs"` // root fs of the container
InitPath string `json:"initpath"` // dockerinit
Entrypoint string `json:"entrypoint"`
Arguments []string `json:"arguments"`
WorkingDir string `json:"working_dir"`
ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
Tty bool `json:"tty"`
Network *Network `json:"network"` // if network is nil then networking is disabled
Config []string `json:"config"` // generic values that specific drivers can consume
Cgroups *cgroups.Values `json:"cgroups"`
}
// Return the pid of the process
// If the process is nil -1 will be returned
func (c *Process) Pid() int {
if c.Process == nil {
return -1
}
return c.Process.Pid
}
// Return the exit code of the process
// if the process has not exited -1 will be returned
func (c *Process) GetExitCode() int {
if c.ProcessState == nil {
return -1
}
return c.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
}

323
execdriver/lxc/driver.go Normal file
View file

@ -0,0 +1,323 @@
package lxc
import (
"fmt"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/utils"
"io/ioutil"
"log"
"os"
"os/exec"
"path"
"strconv"
"strings"
"syscall"
"time"
)
const DriverName = "lxc"
func init() {
execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
if err := setupHostname(args); err != nil {
return err
}
if err := setupNetworking(args); err != nil {
return err
}
if err := setupCapabilities(args); err != nil {
return err
}
if err := setupWorkingDirectory(args); err != nil {
return err
}
if err := changeUser(args); err != nil {
return err
}
path, err := exec.LookPath(args.Args[0])
if err != nil {
log.Printf("Unable to locate %v", args.Args[0])
os.Exit(127)
}
if err := syscall.Exec(path, args.Args, os.Environ()); err != nil {
return fmt.Errorf("dockerinit unable to execute %s - %s", path, err)
}
panic("Unreachable")
})
}
type driver struct {
root string // root path for the driver to use
apparmor bool
sharedRoot bool
}
func NewDriver(root string, apparmor bool) (*driver, error) {
// setup unconfined symlink
if err := linkLxcStart(root); err != nil {
return nil, err
}
return &driver{
apparmor: apparmor,
root: root,
sharedRoot: rootIsShared(),
}, nil
}
func (d *driver) Name() string {
version := d.version()
return fmt.Sprintf("%s-%s", DriverName, version)
}
func (d *driver) Run(c *execdriver.Process, startCallback execdriver.StartCallback) (int, error) {
configPath, err := d.generateLXCConfig(c)
if err != nil {
return -1, err
}
params := []string{
"lxc-start",
"-n", c.ID,
"-f", configPath,
"--",
c.InitPath,
"-driver",
DriverName,
}
if c.Network != nil {
params = append(params,
"-g", c.Network.Gateway,
"-i", fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen),
"-mtu", strconv.Itoa(c.Network.Mtu),
)
}
if c.User != "" {
params = append(params, "-u", c.User)
}
if c.Privileged {
if d.apparmor {
params[0] = path.Join(d.root, "lxc-start-unconfined")
}
params = append(params, "-privileged")
}
if c.WorkingDir != "" {
params = append(params, "-w", c.WorkingDir)
}
if d.sharedRoot {
// lxc-start really needs / to be non-shared, or all kinds of stuff break
// when lxc-start unmount things and those unmounts propagate to the main
// mount namespace.
// What we really want is to clone into a new namespace and then
// mount / MS_REC|MS_SLAVE, but since we can't really clone or fork
// without exec in go we have to do this horrible shell hack...
shellString :=
"mount --make-rslave /; exec " +
utils.ShellQuoteArguments(params)
params = []string{
"unshare", "-m", "--", "/bin/sh", "-c", shellString,
}
}
params = append(params, "--", c.Entrypoint)
params = append(params, c.Arguments...)
var (
name = params[0]
arg = params[1:]
)
aname, err := exec.LookPath(name)
if err != nil {
aname = name
}
c.Path = aname
c.Args = append([]string{name}, arg...)
if err := c.Start(); err != nil {
return -1, err
}
var (
waitErr error
waitLock = make(chan struct{})
)
go func() {
if err := c.Wait(); err != nil {
waitErr = err
}
close(waitLock)
}()
// Poll lxc for RUNNING status
if err := d.waitForStart(c, waitLock); err != nil {
return -1, err
}
if startCallback != nil {
startCallback(c)
}
<-waitLock
return c.GetExitCode(), waitErr
}
func (d *driver) Kill(c *execdriver.Process, sig int) error {
return d.kill(c, sig)
}
func (d *driver) Wait(id string) error {
for {
output, err := exec.Command("lxc-info", "-n", id).CombinedOutput()
if err != nil {
return err
}
if !strings.Contains(string(output), "RUNNING") {
return nil
}
time.Sleep(500 * time.Millisecond)
}
}
func (d *driver) version() string {
version := ""
if output, err := exec.Command("lxc-version").CombinedOutput(); err == nil {
outputStr := string(output)
if len(strings.SplitN(outputStr, ":", 2)) == 2 {
version = strings.TrimSpace(strings.SplitN(outputStr, ":", 2)[1])
}
}
return version
}
func (d *driver) kill(c *execdriver.Process, sig int) error {
output, err := exec.Command("lxc-kill", "-n", c.ID, strconv.Itoa(sig)).CombinedOutput()
if err != nil {
return fmt.Errorf("Err: %s Output: %s", err, output)
}
return nil
}
func (d *driver) waitForStart(c *execdriver.Process, waitLock chan struct{}) error {
var (
err error
output []byte
)
// We wait for the container to be fully running.
// Timeout after 5 seconds. In case of broken pipe, just retry.
// Note: The container can run and finish correctly before
// the end of this loop
for now := time.Now(); time.Since(now) < 5*time.Second; {
select {
case <-waitLock:
// If the process dies while waiting for it, just return
return nil
if c.ProcessState != nil && c.ProcessState.Exited() {
return nil
}
default:
}
output, err = d.getInfo(c.ID)
if err != nil {
output, err = d.getInfo(c.ID)
if err != nil {
return err
}
}
if strings.Contains(string(output), "RUNNING") {
return nil
}
time.Sleep(50 * time.Millisecond)
}
return execdriver.ErrNotRunning
}
func (d *driver) getInfo(id string) ([]byte, error) {
return exec.Command("lxc-info", "-s", "-n", id).CombinedOutput()
}
type info struct {
ID string
driver *driver
}
func (i *info) IsRunning() bool {
var running bool
output, err := i.driver.getInfo(i.ID)
if err != nil {
panic(err)
}
if strings.Contains(string(output), "RUNNING") {
running = true
}
return running
}
func (d *driver) Info(id string) execdriver.Info {
return &info{
ID: id,
driver: d,
}
}
func linkLxcStart(root string) error {
sourcePath, err := exec.LookPath("lxc-start")
if err != nil {
return err
}
targetPath := path.Join(root, "lxc-start-unconfined")
if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) {
return err
} else if err == nil {
if err := os.Remove(targetPath); err != nil {
return err
}
}
return os.Symlink(sourcePath, targetPath)
}
// TODO: This can be moved to the mountinfo reader in the mount pkg
func rootIsShared() bool {
if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil {
for _, line := range strings.Split(string(data), "\n") {
cols := strings.Split(line, " ")
if len(cols) >= 6 && cols[4] == "/" {
return strings.HasPrefix(cols[6], "shared")
}
}
}
// No idea, probably safe to assume so
return true
}
func (d *driver) generateLXCConfig(p *execdriver.Process) (string, error) {
root := path.Join(d.root, "containers", p.ID, "config.lxc")
fo, err := os.Create(root)
if err != nil {
return "", err
}
defer fo.Close()
if err := LxcTemplateCompiled.Execute(fo, struct {
*execdriver.Process
AppArmor bool
}{
Process: p,
AppArmor: d.apparmor,
}); err != nil {
return "", err
}
return root, nil
}

153
execdriver/lxc/init.go Normal file
View file

@ -0,0 +1,153 @@
package lxc
import (
"fmt"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/pkg/netlink"
"github.com/dotcloud/docker/utils"
"github.com/syndtr/gocapability/capability"
"net"
"os"
"strconv"
"strings"
"syscall"
)
func setupHostname(args *execdriver.InitArgs) error {
hostname := getEnv(args, "HOSTNAME")
if hostname == "" {
return nil
}
return setHostname(hostname)
}
// Setup networking
func setupNetworking(args *execdriver.InitArgs) error {
if args.Ip != "" {
// eth0
iface, err := net.InterfaceByName("eth0")
if err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
ip, ipNet, err := net.ParseCIDR(args.Ip)
if err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
if err := netlink.NetworkLinkAddIp(iface, ip, ipNet); err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
if err := netlink.NetworkSetMTU(iface, args.Mtu); err != nil {
return fmt.Errorf("Unable to set MTU: %v", err)
}
if err := netlink.NetworkLinkUp(iface); err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
// loopback
iface, err = net.InterfaceByName("lo")
if err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
if err := netlink.NetworkLinkUp(iface); err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
}
if args.Gateway != "" {
gw := net.ParseIP(args.Gateway)
if gw == nil {
return fmt.Errorf("Unable to set up networking, %s is not a valid gateway IP", args.Gateway)
}
if err := netlink.AddDefaultGw(gw); err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
}
return nil
}
// Setup working directory
func setupWorkingDirectory(args *execdriver.InitArgs) error {
if args.WorkDir == "" {
return nil
}
if err := syscall.Chdir(args.WorkDir); err != nil {
return fmt.Errorf("Unable to change dir to %v: %v", args.WorkDir, err)
}
return nil
}
// Takes care of dropping privileges to the desired user
func changeUser(args *execdriver.InitArgs) error {
if args.User == "" {
return nil
}
userent, err := utils.UserLookup(args.User)
if err != nil {
return fmt.Errorf("Unable to find user %v: %v", args.User, err)
}
uid, err := strconv.Atoi(userent.Uid)
if err != nil {
return fmt.Errorf("Invalid uid: %v", userent.Uid)
}
gid, err := strconv.Atoi(userent.Gid)
if err != nil {
return fmt.Errorf("Invalid gid: %v", userent.Gid)
}
if err := syscall.Setgid(gid); err != nil {
return fmt.Errorf("setgid failed: %v", err)
}
if err := syscall.Setuid(uid); err != nil {
return fmt.Errorf("setuid failed: %v", err)
}
return nil
}
func setupCapabilities(args *execdriver.InitArgs) error {
if args.Privileged {
return nil
}
drop := []capability.Cap{
capability.CAP_SETPCAP,
capability.CAP_SYS_MODULE,
capability.CAP_SYS_RAWIO,
capability.CAP_SYS_PACCT,
capability.CAP_SYS_ADMIN,
capability.CAP_SYS_NICE,
capability.CAP_SYS_RESOURCE,
capability.CAP_SYS_TIME,
capability.CAP_SYS_TTY_CONFIG,
capability.CAP_MKNOD,
capability.CAP_AUDIT_WRITE,
capability.CAP_AUDIT_CONTROL,
capability.CAP_MAC_OVERRIDE,
capability.CAP_MAC_ADMIN,
}
c, err := capability.NewPid(os.Getpid())
if err != nil {
return err
}
c.Unset(capability.CAPS|capability.BOUNDS, drop...)
if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil {
return err
}
return nil
}
func getEnv(args *execdriver.InitArgs, key string) string {
for _, kv := range args.Env {
parts := strings.SplitN(kv, "=", 2)
if parts[0] == key && len(parts) == 2 {
return parts[1]
}
}
return ""
}

View file

@ -1,4 +1,4 @@
package sysinit
package lxc
func setHostname(hostname string) error {
panic("Not supported on darwin")

View file

@ -1,4 +1,4 @@
package sysinit
package lxc
import (
"syscall"

View file

@ -1,23 +1,24 @@
package docker
package lxc
import (
"github.com/dotcloud/docker/cgroups"
"strings"
"text/template"
)
const LxcTemplate = `
{{if .Config.NetworkDisabled}}
# network is disabled (-n=false)
lxc.network.type = empty
{{else}}
{{if .Network}}
# network configuration
lxc.network.type = veth
lxc.network.link = {{.NetworkSettings.Bridge}}
lxc.network.link = {{.Network.Bridge}}
lxc.network.name = eth0
{{else}}
# network is disabled (-n=false)
lxc.network.type = empty
{{end}}
# root filesystem
{{$ROOTFS := .RootfsPath}}
{{$ROOTFS := .Rootfs}}
lxc.rootfs = {{$ROOTFS}}
# use a dedicated pts for the container (and limit the number of pseudo terminal
@ -30,8 +31,8 @@ lxc.console = none
# no controlling tty at all
lxc.tty = 1
{{if (getHostConfig .).Privileged}}
lxc.cgroup.devices.allow = a
{{if .Privileged}}
lxc.cgroup.devices.allow = a
{{else}}
# no implicit access to devices
lxc.cgroup.devices.deny = a
@ -81,8 +82,8 @@ lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noe
lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts newinstance,ptmxmode=0666,nosuid,noexec 0 0
lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs size=65536k,nosuid,nodev,noexec 0 0
{{if (getHostConfig .).Privileged}}
{{if (getCapabilities .).AppArmor}}
{{if .Privileged}}
{{if .AppArmor}}
lxc.aa_profile = unconfined
{{else}}
#lxc.aa_profile = unconfined
@ -90,20 +91,22 @@ lxc.aa_profile = unconfined
{{end}}
# limits
{{if .Config.Memory}}
lxc.cgroup.memory.limit_in_bytes = {{.Config.Memory}}
lxc.cgroup.memory.soft_limit_in_bytes = {{.Config.Memory}}
{{with $memSwap := getMemorySwap .Config}}
{{if .Cgroups}}
{{if .Cgroups.Memory}}
lxc.cgroup.memory.limit_in_bytes = {{.Cgroups.Memory}}
lxc.cgroup.memory.soft_limit_in_bytes = {{.Cgroups.Memory}}
{{with $memSwap := getMemorySwap .Cgroups}}
lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}}
{{end}}
{{end}}
{{if .Config.CpuShares}}
lxc.cgroup.cpu.shares = {{.Config.CpuShares}}
{{if .Cgroups.CpuShares}}
lxc.cgroup.cpu.shares = {{.Cgroups.CpuShares}}
{{end}}
{{end}}
{{if (getHostConfig .).LxcConf}}
{{range $pair := (getHostConfig .).LxcConf}}
{{$pair.Key}} = {{$pair.Value}}
{{if .Config}}
{{range $value := .Config}}
{{$value}}
{{end}}
{{end}}
`
@ -116,29 +119,19 @@ func escapeFstabSpaces(field string) string {
return strings.Replace(field, " ", "\\040", -1)
}
func getMemorySwap(config *Config) int64 {
func getMemorySwap(v *cgroups.Values) int64 {
// By default, MemorySwap is set to twice the size of RAM.
// If you want to omit MemorySwap, set it to `-1'.
if config.MemorySwap < 0 {
if v.MemorySwap < 0 {
return 0
}
return config.Memory * 2
}
func getHostConfig(container *Container) *HostConfig {
return container.hostConfig
}
func getCapabilities(container *Container) *Capabilities {
return container.runtime.capabilities
return v.Memory * 2
}
func init() {
var err error
funcMap := template.FuncMap{
"getMemorySwap": getMemorySwap,
"getHostConfig": getHostConfig,
"getCapabilities": getCapabilities,
"escapeFstabSpaces": escapeFstabSpaces,
}
LxcTemplateCompiled, err = template.New("lxc").Funcs(funcMap).Parse(LxcTemplate)

View file

@ -1,11 +1,14 @@
package docker
package lxc
import (
"bufio"
"fmt"
"github.com/dotcloud/docker/cgroups"
"github.com/dotcloud/docker/execdriver"
"io/ioutil"
"math/rand"
"os"
"path"
"strings"
"testing"
"time"
@ -17,32 +20,39 @@ func TestLXCConfig(t *testing.T) {
t.Fatal(err)
}
defer os.RemoveAll(root)
os.MkdirAll(path.Join(root, "containers", "1"), 0777)
// Memory is allocated randomly for testing
rand.Seed(time.Now().UTC().UnixNano())
memMin := 33554432
memMax := 536870912
mem := memMin + rand.Intn(memMax-memMin)
// CPU shares as well
cpuMin := 100
cpuMax := 10000
cpu := cpuMin + rand.Intn(cpuMax-cpuMin)
container := &Container{
root: root,
Config: &Config{
Memory: int64(mem),
CpuShares: int64(cpu),
NetworkDisabled: true,
},
hostConfig: &HostConfig{
Privileged: false,
},
}
if err := container.generateLXCConfig(); err != nil {
var (
memMin = 33554432
memMax = 536870912
mem = memMin + rand.Intn(memMax-memMin)
cpuMin = 100
cpuMax = 10000
cpu = cpuMin + rand.Intn(cpuMax-cpuMin)
)
driver, err := NewDriver(root, false)
if err != nil {
t.Fatal(err)
}
grepFile(t, container.lxcConfigPath(),
process := &execdriver.Process{
ID: "1",
Cgroups: &cgroups.Values{
Memory: int64(mem),
CpuShares: int64(cpu),
},
}
p, err := driver.generateLXCConfig(process)
if err != nil {
t.Fatal(err)
}
grepFile(t, p,
fmt.Sprintf("lxc.cgroup.memory.limit_in_bytes = %d", mem))
grepFile(t, container.lxcConfigPath(),
grepFile(t, p,
fmt.Sprintf("lxc.cgroup.memory.memsw.limit_in_bytes = %d", mem*2))
}
@ -52,31 +62,29 @@ func TestCustomLxcConfig(t *testing.T) {
t.Fatal(err)
}
defer os.RemoveAll(root)
container := &Container{
root: root,
Config: &Config{
Hostname: "foobar",
NetworkDisabled: true,
},
hostConfig: &HostConfig{
Privileged: false,
LxcConf: []KeyValuePair{
{
Key: "lxc.utsname",
Value: "docker",
},
{
Key: "lxc.cgroup.cpuset.cpus",
Value: "0,1",
},
},
},
}
if err := container.generateLXCConfig(); err != nil {
os.MkdirAll(path.Join(root, "containers", "1"), 0777)
driver, err := NewDriver(root, false)
if err != nil {
t.Fatal(err)
}
grepFile(t, container.lxcConfigPath(), "lxc.utsname = docker")
grepFile(t, container.lxcConfigPath(), "lxc.cgroup.cpuset.cpus = 0,1")
process := &execdriver.Process{
ID: "1",
Privileged: false,
Config: []string{
"lxc.utsname = docker",
"lxc.cgroup.cpuset.cpus = 0,1",
},
}
p, err := driver.generateLXCConfig(process)
if err != nil {
t.Fatal(err)
}
grepFile(t, p, "lxc.utsname = docker")
grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
}
func grepFile(t *testing.T, path string, pattern string) {

View file

@ -38,7 +38,6 @@ func mkRuntime(f utils.Fataler) *docker.Runtime {
if err != nil {
f.Fatal(err)
}
r.UpdateCapabilities(true)
return r
}

View file

@ -25,27 +25,37 @@ func Mounted(mountpoint string) (bool, error) {
return false, nil
}
// Mount the specified options at the target path
// Mount the specified options at the target path only if
// the target is not mounted
// Options must be specified as fstab style
func Mount(device, target, mType, options string) error {
if mounted, err := Mounted(target); err != nil || mounted {
return err
}
return ForceMount(device, target, mType, options)
}
// Mount the specified options at the target path
// reguardless if the target is mounted or not
// Options must be specified as fstab style
func ForceMount(device, target, mType, options string) error {
flag, data := parseOptions(options)
if err := mount(device, target, mType, uintptr(flag), data); err != nil {
return err
}
return nil
}
// Unmount the target only if it is mounted
func Unmount(target string) (err error) {
func Unmount(target string) error {
if mounted, err := Mounted(target); err != nil || !mounted {
return err
}
return ForceUnmount(target)
}
// Unmount the target reguardless if it is mounted or not
func ForceUnmount(target string) (err error) {
// Simple retry logic for unmount
for i := 0; i < 10; i++ {
if err = unmount(target, 0); err == nil {

55
pkg/sysinfo/sysinfo.go Normal file
View file

@ -0,0 +1,55 @@
package sysinfo
import (
"github.com/dotcloud/docker/cgroups"
"github.com/dotcloud/docker/utils"
"io/ioutil"
"log"
"os"
"path"
)
type SysInfo struct {
MemoryLimit bool
SwapLimit bool
IPv4ForwardingDisabled bool
AppArmor bool
}
func New(quiet bool) *SysInfo {
sysInfo := &SysInfo{}
if cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory"); err != nil {
if !quiet {
log.Printf("WARNING: %s\n", err)
}
} else {
_, err1 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.limit_in_bytes"))
_, err2 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.soft_limit_in_bytes"))
sysInfo.MemoryLimit = err1 == nil && err2 == nil
if !sysInfo.MemoryLimit && !quiet {
log.Printf("WARNING: Your kernel does not support cgroup memory limit.")
}
_, err = ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.memsw.limit_in_bytes"))
sysInfo.SwapLimit = err == nil
if !sysInfo.SwapLimit && !quiet {
log.Printf("WARNING: Your kernel does not support cgroup swap limit.")
}
}
content, err3 := ioutil.ReadFile("/proc/sys/net/ipv4/ip_forward")
sysInfo.IPv4ForwardingDisabled = err3 != nil || len(content) == 0 || content[0] != '1'
if sysInfo.IPv4ForwardingDisabled && !quiet {
log.Printf("WARNING: IPv4 forwarding is disabled.")
}
// Check if AppArmor seems to be enabled on this system.
if _, err := os.Stat("/sys/kernel/security/apparmor"); os.IsNotExist(err) {
utils.Debugf("/sys/kernel/security/apparmor not found; assuming AppArmor is not enabled.")
sysInfo.AppArmor = false
} else {
utils.Debugf("/sys/kernel/security/apparmor found; assuming AppArmor is enabled.")
sysInfo.AppArmor = true
}
return sysInfo
}

View file

@ -4,18 +4,19 @@ import (
"container/list"
"fmt"
"github.com/dotcloud/docker/archive"
"github.com/dotcloud/docker/cgroups"
"github.com/dotcloud/docker/execdriver"
"github.com/dotcloud/docker/execdriver/chroot"
"github.com/dotcloud/docker/execdriver/lxc"
"github.com/dotcloud/docker/graphdriver"
"github.com/dotcloud/docker/graphdriver/aufs"
_ "github.com/dotcloud/docker/graphdriver/devmapper"
_ "github.com/dotcloud/docker/graphdriver/vfs"
"github.com/dotcloud/docker/pkg/graphdb"
"github.com/dotcloud/docker/pkg/sysinfo"
"github.com/dotcloud/docker/utils"
"io"
"io/ioutil"
"log"
"os"
"os/exec"
"path"
"regexp"
"sort"
@ -35,13 +36,6 @@ var (
validContainerNamePattern = regexp.MustCompile(`^/?` + validContainerNameChars + `+$`)
)
type Capabilities struct {
MemoryLimit bool
SwapLimit bool
IPv4ForwardingDisabled bool
AppArmor bool
}
type Runtime struct {
repository string
sysInitPath string
@ -50,12 +44,13 @@ type Runtime struct {
graph *Graph
repositories *TagStore
idIndex *utils.TruncIndex
capabilities *Capabilities
sysInfo *sysinfo.SysInfo
volumes *Graph
srv *Server
config *DaemonConfig
containerGraph *graphdb.Database
driver graphdriver.Driver
execDriver execdriver.Driver
}
// List returns an array of all containers registered in the runtime.
@ -160,11 +155,9 @@ func (runtime *Runtime) Register(container *Container) error {
// if so, then we need to restart monitor and init a new lock
// If the container is supposed to be running, make sure of it
if container.State.IsRunning() {
output, err := exec.Command("lxc-info", "-n", container.ID).CombinedOutput()
if err != nil {
return err
}
if !strings.Contains(string(output), "RUNNING") {
info := runtime.execDriver.Info(container.ID)
if !info.IsRunning() {
utils.Debugf("Container %s was supposed to be running but is not.", container.ID)
if runtime.config.AutoRestart {
utils.Debugf("Restarting")
@ -188,7 +181,7 @@ func (runtime *Runtime) Register(container *Container) error {
}
container.waitLock = make(chan struct{})
go container.monitor()
go container.monitor(nil)
}
}
return nil
@ -331,43 +324,6 @@ func (runtime *Runtime) restore() error {
return nil
}
// FIXME: comment please!
func (runtime *Runtime) UpdateCapabilities(quiet bool) {
if cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory"); err != nil {
if !quiet {
log.Printf("WARNING: %s\n", err)
}
} else {
_, err1 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.limit_in_bytes"))
_, err2 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.soft_limit_in_bytes"))
runtime.capabilities.MemoryLimit = err1 == nil && err2 == nil
if !runtime.capabilities.MemoryLimit && !quiet {
log.Printf("WARNING: Your kernel does not support cgroup memory limit.")
}
_, err = ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.memsw.limit_in_bytes"))
runtime.capabilities.SwapLimit = err == nil
if !runtime.capabilities.SwapLimit && !quiet {
log.Printf("WARNING: Your kernel does not support cgroup swap limit.")
}
}
content, err3 := ioutil.ReadFile("/proc/sys/net/ipv4/ip_forward")
runtime.capabilities.IPv4ForwardingDisabled = err3 != nil || len(content) == 0 || content[0] != '1'
if runtime.capabilities.IPv4ForwardingDisabled && !quiet {
log.Printf("WARNING: IPv4 forwarding is disabled.")
}
// Check if AppArmor seems to be enabled on this system.
if _, err := os.Stat("/sys/kernel/security/apparmor"); os.IsNotExist(err) {
utils.Debugf("/sys/kernel/security/apparmor not found; assuming AppArmor is not enabled.")
runtime.capabilities.AppArmor = false
} else {
utils.Debugf("/sys/kernel/security/apparmor found; assuming AppArmor is enabled.")
runtime.capabilities.AppArmor = true
}
}
// Create creates a new container from the given configuration with a given name.
func (runtime *Runtime) Create(config *Config, name string) (*Container, []string, error) {
// Lookup image
@ -646,7 +602,6 @@ func NewRuntime(config *DaemonConfig) (*Runtime, error) {
if err != nil {
return nil, err
}
runtime.UpdateCapabilities(false)
return runtime, nil
}
@ -675,10 +630,6 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
}
}
utils.Debugf("Escaping AppArmor confinement")
if err := linkLxcStart(config.Root); err != nil {
return nil, err
}
utils.Debugf("Creating images graph")
g, err := NewGraph(path.Join(config.Root, "graph"), driver)
if err != nil {
@ -735,6 +686,26 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
sysInitPath = localCopy
}
sysInfo := sysinfo.New(false)
/*
temporarilly disabled.
*/
if false {
var ed execdriver.Driver
if driver := os.Getenv("EXEC_DRIVER"); driver == "lxc" {
ed, err = lxc.NewDriver(config.Root, sysInfo.AppArmor)
} else {
ed, err = chroot.NewDriver()
}
if ed != nil {
}
}
ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor)
if err != nil {
return nil, err
}
runtime := &Runtime{
repository: runtimeRepo,
containers: list.New(),
@ -742,12 +713,13 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) {
graph: g,
repositories: repositories,
idIndex: utils.NewTruncIndex(),
capabilities: &Capabilities{},
sysInfo: sysInfo,
volumes: volumes,
config: config,
containerGraph: graph,
driver: driver,
sysInitPath: sysInitPath,
execDriver: ed,
}
if err := runtime.restore(); err != nil {
@ -829,6 +801,18 @@ func (runtime *Runtime) Diff(container *Container) (archive.Archive, error) {
return archive.ExportChanges(cDir, changes)
}
func (runtime *Runtime) Run(c *Container, startCallback execdriver.StartCallback) (int, error) {
return runtime.execDriver.Run(c.process, startCallback)
}
func (runtime *Runtime) Kill(c *Container, sig int) error {
return runtime.execDriver.Kill(c.process, sig)
}
func (runtime *Runtime) WaitGhost(c *Container) error {
return runtime.execDriver.Wait(c.ID)
}
// Nuke kills all containers then removes all content
// from the content root, including images, volumes and
// container filesystems.
@ -848,23 +832,6 @@ func (runtime *Runtime) Nuke() error {
return os.RemoveAll(runtime.config.Root)
}
func linkLxcStart(root string) error {
sourcePath, err := exec.LookPath("lxc-start")
if err != nil {
return err
}
targetPath := path.Join(root, "lxc-start-unconfined")
if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) {
return err
} else if err == nil {
if err := os.Remove(targetPath); err != nil {
return err
}
}
return os.Symlink(sourcePath, targetPath)
}
// FIXME: this is a convenience function for integration tests
// which need direct access to runtime.graph.
// Once the tests switch to using engine and jobs, this method

View file

@ -516,7 +516,7 @@ func (srv *Server) ImageInsert(job *engine.Job) engine.Status {
}
defer file.Body.Close()
config, _, _, err := ParseRun([]string{img.ID, "echo", "insert", url, path}, srv.runtime.capabilities)
config, _, _, err := ParseRun([]string{img.ID, "echo", "insert", url, path}, srv.runtime.sysInfo)
if err != nil {
job.Error(err)
return engine.StatusErr
@ -661,13 +661,6 @@ func (srv *Server) DockerInfo(job *engine.Job) engine.Status {
} else {
imgcount = len(images)
}
lxcVersion := ""
if output, err := exec.Command("lxc-version").CombinedOutput(); err == nil {
outputStr := string(output)
if len(strings.SplitN(outputStr, ":", 2)) == 2 {
lxcVersion = strings.TrimSpace(strings.SplitN(string(output), ":", 2)[1])
}
}
kernelVersion := "<unknown>"
if kv, err := utils.GetKernelVersion(); err == nil {
kernelVersion = kv.String()
@ -685,13 +678,13 @@ func (srv *Server) DockerInfo(job *engine.Job) engine.Status {
v.SetInt("Images", imgcount)
v.Set("Driver", srv.runtime.driver.String())
v.SetJson("DriverStatus", srv.runtime.driver.Status())
v.SetBool("MemoryLimit", srv.runtime.capabilities.MemoryLimit)
v.SetBool("SwapLimit", srv.runtime.capabilities.SwapLimit)
v.SetBool("IPv4Forwarding", !srv.runtime.capabilities.IPv4ForwardingDisabled)
v.SetBool("MemoryLimit", srv.runtime.sysInfo.MemoryLimit)
v.SetBool("SwapLimit", srv.runtime.sysInfo.SwapLimit)
v.SetBool("IPv4Forwarding", !srv.runtime.sysInfo.IPv4ForwardingDisabled)
v.SetBool("Debug", os.Getenv("DEBUG") != "")
v.SetInt("NFd", utils.GetTotalUsedFds())
v.SetInt("NGoroutines", runtime.NumGoroutine())
v.Set("LXCVersion", lxcVersion)
v.Set("ExecutionDriver", srv.runtime.execDriver.Name())
v.SetInt("NEventsListener", len(srv.events))
v.Set("KernelVersion", kernelVersion)
v.Set("IndexServerAddress", auth.IndexServerAddress())
@ -1477,10 +1470,10 @@ func (srv *Server) ContainerCreate(job *engine.Job) engine.Status {
job.Errorf("Minimum memory limit allowed is 512k")
return engine.StatusErr
}
if config.Memory > 0 && !srv.runtime.capabilities.MemoryLimit {
if config.Memory > 0 && !srv.runtime.sysInfo.MemoryLimit {
config.Memory = 0
}
if config.Memory > 0 && !srv.runtime.capabilities.SwapLimit {
if config.Memory > 0 && !srv.runtime.sysInfo.SwapLimit {
config.MemorySwap = -1
}
container, buildWarnings, err := srv.runtime.Create(&config, name)

View file

@ -4,163 +4,19 @@ import (
"encoding/json"
"flag"
"fmt"
"github.com/dotcloud/docker/pkg/netlink"
"github.com/dotcloud/docker/utils"
"github.com/syndtr/gocapability/capability"
"github.com/dotcloud/docker/execdriver"
_ "github.com/dotcloud/docker/execdriver/chroot"
_ "github.com/dotcloud/docker/execdriver/lxc"
"io/ioutil"
"log"
"net"
"os"
"os/exec"
"strconv"
"strings"
"syscall"
)
type DockerInitArgs struct {
user string
gateway string
ip string
workDir string
privileged bool
env []string
args []string
mtu int
}
func setupHostname(args *DockerInitArgs) error {
hostname := getEnv(args, "HOSTNAME")
if hostname == "" {
return nil
}
return setHostname(hostname)
}
// Setup networking
func setupNetworking(args *DockerInitArgs) error {
if args.ip != "" {
// eth0
iface, err := net.InterfaceByName("eth0")
if err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
ip, ipNet, err := net.ParseCIDR(args.ip)
if err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
if err := netlink.NetworkLinkAddIp(iface, ip, ipNet); err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
if err := netlink.NetworkSetMTU(iface, args.mtu); err != nil {
return fmt.Errorf("Unable to set MTU: %v", err)
}
if err := netlink.NetworkLinkUp(iface); err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
// loopback
iface, err = net.InterfaceByName("lo")
if err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
if err := netlink.NetworkLinkUp(iface); err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
}
if args.gateway != "" {
gw := net.ParseIP(args.gateway)
if gw == nil {
return fmt.Errorf("Unable to set up networking, %s is not a valid gateway IP", args.gateway)
}
if err := netlink.AddDefaultGw(gw); err != nil {
return fmt.Errorf("Unable to set up networking: %v", err)
}
}
return nil
}
// Setup working directory
func setupWorkingDirectory(args *DockerInitArgs) error {
if args.workDir == "" {
return nil
}
if err := syscall.Chdir(args.workDir); err != nil {
return fmt.Errorf("Unable to change dir to %v: %v", args.workDir, err)
}
return nil
}
// Takes care of dropping privileges to the desired user
func changeUser(args *DockerInitArgs) error {
if args.user == "" {
return nil
}
userent, err := utils.UserLookup(args.user)
if err != nil {
return fmt.Errorf("Unable to find user %v: %v", args.user, err)
}
uid, err := strconv.Atoi(userent.Uid)
if err != nil {
return fmt.Errorf("Invalid uid: %v", userent.Uid)
}
gid, err := strconv.Atoi(userent.Gid)
if err != nil {
return fmt.Errorf("Invalid gid: %v", userent.Gid)
}
if err := syscall.Setgid(gid); err != nil {
return fmt.Errorf("setgid failed: %v", err)
}
if err := syscall.Setuid(uid); err != nil {
return fmt.Errorf("setuid failed: %v", err)
}
return nil
}
func setupCapabilities(args *DockerInitArgs) error {
if args.privileged {
return nil
}
drop := []capability.Cap{
capability.CAP_SETPCAP,
capability.CAP_SYS_MODULE,
capability.CAP_SYS_RAWIO,
capability.CAP_SYS_PACCT,
capability.CAP_SYS_ADMIN,
capability.CAP_SYS_NICE,
capability.CAP_SYS_RESOURCE,
capability.CAP_SYS_TIME,
capability.CAP_SYS_TTY_CONFIG,
capability.CAP_MKNOD,
capability.CAP_AUDIT_WRITE,
capability.CAP_AUDIT_CONTROL,
capability.CAP_MAC_OVERRIDE,
capability.CAP_MAC_ADMIN,
}
c, err := capability.NewPid(os.Getpid())
if err != nil {
return err
}
c.Unset(capability.CAPS|capability.BOUNDS, drop...)
if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil {
return err
}
return nil
}
// Clear environment pollution introduced by lxc-start
func setupEnv(args *DockerInitArgs) {
func setupEnv(args *execdriver.InitArgs) {
os.Clearenv()
for _, kv := range args.env {
for _, kv := range args.Env {
parts := strings.SplitN(kv, "=", 2)
if len(parts) == 1 {
parts = append(parts, "")
@ -169,50 +25,19 @@ func setupEnv(args *DockerInitArgs) {
}
}
func getEnv(args *DockerInitArgs, key string) string {
for _, kv := range args.env {
parts := strings.SplitN(kv, "=", 2)
if parts[0] == key && len(parts) == 2 {
return parts[1]
}
}
return ""
}
func executeProgram(args *DockerInitArgs) error {
func executeProgram(args *execdriver.InitArgs) error {
setupEnv(args)
if err := setupHostname(args); err != nil {
return err
}
if err := setupNetworking(args); err != nil {
return err
}
if err := setupCapabilities(args); err != nil {
return err
}
if err := setupWorkingDirectory(args); err != nil {
return err
}
if err := changeUser(args); err != nil {
return err
}
path, err := exec.LookPath(args.args[0])
dockerInitFct, err := execdriver.GetInitFunc(args.Driver)
if err != nil {
log.Printf("Unable to locate %v", args.args[0])
os.Exit(127)
panic(err)
}
return dockerInitFct(args)
if args.Driver == "lxc" {
// Will never reach
} else if args.Driver == "chroot" {
}
if err := syscall.Exec(path, args.args, os.Environ()); err != nil {
return fmt.Errorf("dockerinit unable to execute %s - %s", path, err)
}
// Will never reach here
return nil
}
@ -232,6 +57,7 @@ func SysInit() {
workDir := flag.String("w", "", "workdir")
privileged := flag.Bool("privileged", false, "privileged mode")
mtu := flag.Int("mtu", 1500, "interface mtu")
driver := flag.String("driver", "", "exec driver")
flag.Parse()
// Get env
@ -247,15 +73,16 @@ func SysInit() {
// Propagate the plugin-specific container env variable
env = append(env, "container="+os.Getenv("container"))
args := &DockerInitArgs{
user: *user,
gateway: *gateway,
ip: *ip,
workDir: *workDir,
privileged: *privileged,
env: env,
args: flag.Args(),
mtu: *mtu,
args := &execdriver.InitArgs{
User: *user,
Gateway: *gateway,
Ip: *ip,
WorkDir: *workDir,
Privileged: *privileged,
Env: env,
Args: flag.Args(),
Mtu: *mtu,
Driver: *driver,
}
if err := executeProgram(args); err != nil {

View file

@ -5,7 +5,6 @@ import (
"github.com/dotcloud/docker/archive"
"github.com/dotcloud/docker/pkg/namesgenerator"
"github.com/dotcloud/docker/utils"
"io/ioutil"
"strconv"
"strings"
)
@ -328,20 +327,6 @@ func parseLink(rawLink string) (map[string]string, error) {
return utils.PartParser("name:alias", rawLink)
}
func RootIsShared() bool {
if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil {
for _, line := range strings.Split(string(data), "\n") {
cols := strings.Split(line, " ")
if len(cols) >= 6 && cols[4] == "/" {
return strings.HasPrefix(cols[6], "shared")
}
}
}
// No idea, probably safe to assume so
return true
}
type checker struct {
runtime *Runtime
}