Browse Source

Add ability to pause/unpause containers via cgroups freeze

This patch adds pause/unpause to the command line, api, and drivers
for use on containers.  This is implemented using the cgroups/freeze
utility in libcontainer and lxc freeze/unfreeze.

Co-Authored-By: Eric Windisch <ewindisch@docker.com>
Co-Authored-By: Chris Alfonso <calfonso@redhat.com>
Docker-DCO-1.1-Signed-off-by: Ian Main <imain@redhat.com> (github: imain)
Ian Main 11 years ago
parent
commit
b054569cde

+ 48 - 0
api/client/commands.go

@@ -65,6 +65,7 @@ func (cli *DockerCli) CmdHelp(args ...string) error {
 		{"login", "Register or Login to the docker registry server"},
 		{"logs", "Fetch the logs of a container"},
 		{"port", "Lookup the public-facing port which is NAT-ed to PRIVATE_PORT"},
+		{"pause", "Pause all processes within a container"},
 		{"ps", "List containers"},
 		{"pull", "Pull an image or a repository from the docker registry server"},
 		{"push", "Push an image or a repository to the docker registry server"},
@@ -78,6 +79,7 @@ func (cli *DockerCli) CmdHelp(args ...string) error {
 		{"stop", "Stop a running container"},
 		{"tag", "Tag an image into a repository"},
 		{"top", "Lookup the running processes of a container"},
+		{"unpause", "Unpause a paused container"},
 		{"version", "Show the docker version information"},
 		{"wait", "Block until a container stops, then print its exit code"},
 	} {
@@ -648,6 +650,52 @@ func (cli *DockerCli) CmdStart(args ...string) error {
 	return nil
 }
 
+func (cli *DockerCli) CmdUnpause(args ...string) error {
+	cmd := cli.Subcmd("unpause", "CONTAINER", "Unpause all processes within a container")
+	if err := cmd.Parse(args); err != nil {
+		return nil
+	}
+
+	if cmd.NArg() != 1 {
+		cmd.Usage()
+		return nil
+	}
+
+	var encounteredError error
+	for _, name := range cmd.Args() {
+		if _, _, err := readBody(cli.call("POST", fmt.Sprintf("/containers/%s/unpause", name), nil, false)); err != nil {
+			fmt.Fprintf(cli.err, "%s\n", err)
+			encounteredError = fmt.Errorf("Error: failed to unpause container named %s", name)
+		} else {
+			fmt.Fprintf(cli.out, "%s\n", name)
+		}
+	}
+	return encounteredError
+}
+
+func (cli *DockerCli) CmdPause(args ...string) error {
+	cmd := cli.Subcmd("pause", "CONTAINER", "Pause all processes within a container")
+	if err := cmd.Parse(args); err != nil {
+		return nil
+	}
+
+	if cmd.NArg() != 1 {
+		cmd.Usage()
+		return nil
+	}
+
+	var encounteredError error
+	for _, name := range cmd.Args() {
+		if _, _, err := readBody(cli.call("POST", fmt.Sprintf("/containers/%s/pause", name), nil, false)); err != nil {
+			fmt.Fprintf(cli.err, "%s\n", err)
+			encounteredError = fmt.Errorf("Error: failed to pause container named %s", name)
+		} else {
+			fmt.Fprintf(cli.out, "%s\n", name)
+		}
+	}
+	return encounteredError
+}
+
 func (cli *DockerCli) CmdInspect(args ...string) error {
 	cmd := cli.Subcmd("inspect", "CONTAINER|IMAGE [CONTAINER|IMAGE...]", "Return low-level information on a container/image")
 	tmplStr := cmd.String([]string{"f", "#format", "-format"}, "", "Format the output using the given go template.")

+ 32 - 0
api/server/server.go

@@ -165,6 +165,36 @@ func postContainersKill(eng *engine.Engine, version version.Version, w http.Resp
 	return nil
 }
 
+func postContainersPause(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
+	if vars == nil {
+		return fmt.Errorf("Missing parameter")
+	}
+	if err := parseForm(r); err != nil {
+		return err
+	}
+	job := eng.Job("pause", vars["name"])
+	if err := job.Run(); err != nil {
+		return err
+	}
+	w.WriteHeader(http.StatusNoContent)
+	return nil
+}
+
+func postContainersUnpause(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
+	if vars == nil {
+		return fmt.Errorf("Missing parameter")
+	}
+	if err := parseForm(r); err != nil {
+		return err
+	}
+	job := eng.Job("unpause", vars["name"])
+	if err := job.Run(); err != nil {
+		return err
+	}
+	w.WriteHeader(http.StatusNoContent)
+	return nil
+}
+
 func getContainersExport(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
 	if vars == nil {
 		return fmt.Errorf("Missing parameter")
@@ -1087,6 +1117,8 @@ func createRouter(eng *engine.Engine, logging, enableCors bool, dockerVersion st
 			"/images/{name:.*}/tag":         postImagesTag,
 			"/containers/create":            postContainersCreate,
 			"/containers/{name:.*}/kill":    postContainersKill,
+			"/containers/{name:.*}/pause":   postContainersPause,
+			"/containers/{name:.*}/unpause": postContainersUnpause,
 			"/containers/{name:.*}/restart": postContainersRestart,
 			"/containers/{name:.*}/start":   postContainersStart,
 			"/containers/{name:.*}/stop":    postContainersStop,

+ 25 - 0
daemon/container.go

@@ -544,6 +544,26 @@ func (container *Container) KillSig(sig int) error {
 	return container.daemon.Kill(container, sig)
 }
 
+func (container *Container) Pause() error {
+	if container.State.IsPaused() {
+		return fmt.Errorf("Container %s is already paused", container.ID)
+	}
+	if !container.State.IsRunning() {
+		return fmt.Errorf("Container %s is not running", container.ID)
+	}
+	return container.daemon.Pause(container)
+}
+
+func (container *Container) Unpause() error {
+	if !container.State.IsPaused() {
+		return fmt.Errorf("Container %s is not paused", container.ID)
+	}
+	if !container.State.IsRunning() {
+		return fmt.Errorf("Container %s is not running", container.ID)
+	}
+	return container.daemon.Unpause(container)
+}
+
 func (container *Container) Kill() error {
 	if !container.State.IsRunning() {
 		return nil
@@ -574,6 +594,11 @@ func (container *Container) Stop(seconds int) error {
 		return nil
 	}
 
+	// We could unpause the container for them rather than returning this error
+	if container.State.IsPaused() {
+		return fmt.Errorf("Container %s is paused. Unpause the container before stopping", container.ID)
+	}
+
 	// 1. Send a SIGTERM
 	if err := container.KillSig(15); err != nil {
 		log.Print("Failed to send SIGTERM to the process, force killing")

+ 18 - 0
daemon/daemon.go

@@ -1014,6 +1014,24 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e
 	return daemon.execDriver.Run(c.command, pipes, startCallback)
 }
 
+func (daemon *Daemon) Pause(c *Container) error {
+	err := daemon.execDriver.Pause(c.command)
+	if err != nil {
+		return err
+	}
+	c.State.SetPaused()
+	return nil
+}
+
+func (daemon *Daemon) Unpause(c *Container) error {
+	err := daemon.execDriver.Unpause(c.command)
+	if err != nil {
+		return err
+	}
+	c.State.SetUnpaused()
+	return nil
+}
+
 func (daemon *Daemon) Kill(c *Container, sig int) error {
 	return daemon.execDriver.Kill(c.command, sig)
 }

+ 2 - 0
daemon/execdriver/driver.go

@@ -83,6 +83,8 @@ type TtyTerminal interface {
 type Driver interface {
 	Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
 	Kill(c *Command, sig int) error
+	Pause(c *Command) error
+	Unpause(c *Command) error
 	Name() string                                 // Driver name
 	Info(id string) Info                          // "temporary" hack (until we move state from core to plugins)
 	GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.

+ 24 - 0
daemon/execdriver/lxc/driver.go

@@ -218,6 +218,30 @@ func (d *driver) Kill(c *execdriver.Command, sig int) error {
 	return KillLxc(c.ID, sig)
 }
 
+func (d *driver) Pause(c *execdriver.Command) error {
+	_, err := exec.LookPath("lxc-freeze")
+	if err == nil {
+		output, errExec := exec.Command("lxc-freeze", "-n", c.ID).CombinedOutput()
+		if errExec != nil {
+			return fmt.Errorf("Err: %s Output: %s", errExec, output)
+		}
+	}
+
+	return err
+}
+
+func (d *driver) Unpause(c *execdriver.Command) error {
+	_, err := exec.LookPath("lxc-unfreeze")
+	if err == nil {
+		output, errExec := exec.Command("lxc-unfreeze", "-n", c.ID).CombinedOutput()
+		if errExec != nil {
+			return fmt.Errorf("Err: %s Output: %s", errExec, output)
+		}
+	}
+
+	return err
+}
+
 func (d *driver) Terminate(c *execdriver.Command) error {
 	return KillLxc(c.ID, 9)
 }

+ 11 - 0
daemon/execdriver/native/configuration/parse.go

@@ -27,6 +27,7 @@ var actions = map[string]Action{
 	"cgroups.memory_reservation": memoryReservation, // set the memory reservation
 	"cgroups.memory_swap":        memorySwap,        // set the memory swap limit
 	"cgroups.cpuset.cpus":        cpusetCpus,        // set the cpus used
+	"cgroups.freezer":            freezer,           // set the frozen/thaw state
 
 	"systemd.slice": systemdSlice, // set parent Slice used for systemd unit
 
@@ -35,6 +36,16 @@ var actions = map[string]Action{
 	"fs.readonly": readonlyFs, // make the rootfs of the container read only
 }
 
+func freezer(container *libcontainer.Container, context interface{}, value string) error {
+	if container.Cgroups == nil {
+		return fmt.Errorf("cannot set cgroups when they are disabled")
+	}
+
+	container.Cgroups.Freezer = value
+
+	return nil
+}
+
 func cpusetCpus(container *libcontainer.Container, context interface{}, value string) error {
 	if container.Cgroups == nil {
 		return fmt.Errorf("cannot set cgroups when they are disabled")

+ 26 - 0
daemon/execdriver/native/driver.go

@@ -145,6 +145,32 @@ func (d *driver) Kill(p *execdriver.Command, sig int) error {
 	return syscall.Kill(p.Process.Pid, syscall.Signal(sig))
 }
 
+func (d *driver) Pause(c *execdriver.Command) error {
+	active := d.activeContainers[c.ID]
+	active.container.Cgroups.Freezer = "FROZEN"
+	pid := c.Process.Pid
+
+	if systemd.UseSystemd() {
+		_, err := systemd.Apply(active.container.Cgroups, pid)
+		return err
+	}
+	_, err := fs.Apply(active.container.Cgroups, pid)
+	return err
+}
+
+func (d *driver) Unpause(c *execdriver.Command) error {
+	active := d.activeContainers[c.ID]
+	active.container.Cgroups.Freezer = "THAWED"
+	pid := c.Process.Pid
+
+	if systemd.UseSystemd() {
+		_, err := systemd.Apply(active.container.Cgroups, pid)
+		return err
+	}
+	_, err := fs.Apply(active.container.Cgroups, pid)
+	return err
+}
+
 func (d *driver) Terminate(p *execdriver.Command) error {
 	// lets check the start time for the process
 	started, err := d.readStartTime(p)

+ 24 - 0
daemon/state.go

@@ -11,6 +11,7 @@ import (
 type State struct {
 	sync.RWMutex
 	Running    bool
+	Paused     bool
 	Pid        int
 	ExitCode   int
 	StartedAt  time.Time
@@ -23,6 +24,9 @@ func (s *State) String() string {
 	defer s.RUnlock()
 
 	if s.Running {
+		if s.Paused {
+			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
+		}
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 	}
 	if s.FinishedAt.IsZero() {
@@ -50,6 +54,7 @@ func (s *State) SetRunning(pid int) {
 	defer s.Unlock()
 
 	s.Running = true
+	s.Paused = false
 	s.ExitCode = 0
 	s.Pid = pid
 	s.StartedAt = time.Now().UTC()
@@ -64,3 +69,22 @@ func (s *State) SetStopped(exitCode int) {
 	s.FinishedAt = time.Now().UTC()
 	s.ExitCode = exitCode
 }
+
+func (s *State) SetPaused() {
+	s.Lock()
+	defer s.Unlock()
+	s.Paused = true
+}
+
+func (s *State) SetUnpaused() {
+	s.Lock()
+	defer s.Unlock()
+	s.Paused = false
+}
+
+func (s *State) IsPaused() bool {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.Paused
+}

+ 38 - 0
server/server.go

@@ -125,6 +125,8 @@ func InitServer(job *engine.Job) engine.Status {
 		"restart":          srv.ContainerRestart,
 		"start":            srv.ContainerStart,
 		"kill":             srv.ContainerKill,
+		"pause":            srv.ContainerPause,
+		"unpause":          srv.ContainerUnpause,
 		"wait":             srv.ContainerWait,
 		"tag":              srv.ImageTag, // FIXME merge with "image_tag"
 		"resize":           srv.ContainerResize,
@@ -168,6 +170,42 @@ func InitServer(job *engine.Job) engine.Status {
 	return engine.StatusOK
 }
 
+func (srv *Server) ContainerPause(job *engine.Job) engine.Status {
+	if n := len(job.Args); n < 1 || n > 2 {
+		return job.Errorf("Usage: %s CONTAINER", job.Name)
+	}
+	var (
+		name = job.Args[0]
+	)
+
+	if container := srv.daemon.Get(name); container != nil {
+		if err := container.Pause(); err != nil {
+			return job.Errorf("Cannot pause container %s: %s", name, err)
+		}
+	} else {
+		return job.Errorf("No such container: %s", name)
+	}
+	return engine.StatusOK
+}
+
+func (srv *Server) ContainerUnpause(job *engine.Job) engine.Status {
+	if n := len(job.Args); n < 1 || n > 2 {
+		return job.Errorf("Usage: %s CONTAINER", job.Name)
+	}
+	var (
+		name = job.Args[0]
+	)
+
+	if container := srv.daemon.Get(name); container != nil {
+		if err := container.Unpause(); err != nil {
+			return job.Errorf("Cannot unpause container %s: %s", name, err)
+		}
+	} else {
+		return job.Errorf("No such container: %s", name)
+	}
+	return engine.StatusOK
+}
+
 // ContainerKill send signal to the container
 // If no signal is given (sig 0), then Kill with SIGKILL and wait
 // for the container to exit.