Initial implementation of containerd Checkpoint API.

Signed-off-by: boucher <rboucher@gmail.com>
This commit is contained in:
boucher 2016-05-12 10:52:00 -04:00
parent e345d67c4e
commit d8fef66b03
43 changed files with 659 additions and 38 deletions

View file

@ -57,12 +57,17 @@ RUN apt-get update && apt-get install -y \
libapparmor-dev \
libcap-dev \
libltdl-dev \
libnl-3-dev \
libprotobuf-c0-dev \
libprotobuf-dev \
libsqlite3-dev \
libsystemd-journal-dev \
libtool \
mercurial \
net-tools \
pkg-config \
protobuf-compiler \
protobuf-c-compiler \
python-dev \
python-mock \
python-pip \
@ -145,6 +150,14 @@ RUN git clone https://github.com/golang/lint.git /go/src/github.com/golang/lint
&& (cd /go/src/github.com/golang/lint && git checkout -q $GO_LINT_COMMIT) \
&& go install -v github.com/golang/lint/golint
# Install CRIU for checkpoint/restore support
ENV CRIU_VERSION 2.2
RUN mkdir -p /usr/src/criu \
&& curl -sSL https://github.com/xemul/criu/archive/v${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
&& cd /usr/src/criu \
&& make \
&& make install-criu
# Install two versions of the registry. The first is an older version that
# only supports schema1 manifests. The second is a newer version that supports
# both. This allows integration-cli tests to cover push/pull with both schema1

View file

@ -0,0 +1,12 @@
// +build experimental
package checkpoint
import "github.com/docker/docker/api/types"
// Backend for Checkpoint
type Backend interface {
CheckpointCreate(container string, config types.CheckpointCreateOptions) error
CheckpointDelete(container string, checkpointID string) error
CheckpointList(container string) ([]types.Checkpoint, error)
}

View file

@ -0,0 +1,28 @@
package checkpoint
import (
"github.com/docker/docker/api/server/httputils"
"github.com/docker/docker/api/server/router"
)
// checkpointRouter is a router to talk with the checkpoint controller
type checkpointRouter struct {
backend Backend
decoder httputils.ContainerDecoder
routes []router.Route
}
// NewRouter initializes a new checkpoint router
func NewRouter(b Backend, decoder httputils.ContainerDecoder) router.Router {
r := &checkpointRouter{
backend: b,
decoder: decoder,
}
r.initRoutes()
return r
}
// Routes returns the available routers to the checkpoint controller
func (r *checkpointRouter) Routes() []router.Route {
return r.routes
}

View file

@ -0,0 +1,15 @@
// +build experimental
package checkpoint
import (
"github.com/docker/docker/api/server/router"
)
func (r *checkpointRouter) initRoutes() {
r.routes = []router.Route{
router.NewGetRoute("/containers/{name:.*}/checkpoints", r.getContainerCheckpoints),
router.NewPostRoute("/containers/{name:.*}/checkpoints", r.postContainerCheckpoint),
router.NewDeleteRoute("/containers/{name:.*}/checkpoints/{checkpoint:.*}", r.deleteContainerCheckpoint),
}
}

View file

@ -0,0 +1,8 @@
// +build !experimental
package checkpoint
func (r *checkpointRouter) initRoutes() {}
// Backend is empty so that the package can compile in non-experimental
type Backend interface{}

View file

@ -0,0 +1,60 @@
// +build experimental
package checkpoint
import (
"encoding/json"
"net/http"
"github.com/docker/docker/api/server/httputils"
"github.com/docker/docker/api/types"
"golang.org/x/net/context"
)
func (s *checkpointRouter) postContainerCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := httputils.ParseForm(r); err != nil {
return err
}
var options types.CheckpointCreateOptions
decoder := json.NewDecoder(r.Body)
if err := decoder.Decode(&options); err != nil {
return err
}
err := s.backend.CheckpointCreate(vars["name"], options)
if err != nil {
return err
}
w.WriteHeader(http.StatusNoContent)
return nil
}
func (s *checkpointRouter) getContainerCheckpoints(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := httputils.ParseForm(r); err != nil {
return err
}
checkpoints, err := s.backend.CheckpointList(vars["name"])
if err != nil {
return err
}
return httputils.WriteJSON(w, http.StatusOK, checkpoints)
}
func (s *checkpointRouter) deleteContainerCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := httputils.ParseForm(r); err != nil {
return err
}
err := s.backend.CheckpointDelete(vars["name"], vars["checkpoint"])
if err != nil {
return err
}
w.WriteHeader(http.StatusNoContent)
return nil
}

View file

@ -39,7 +39,7 @@ type stateBackend interface {
ContainerResize(name string, height, width int) error
ContainerRestart(name string, seconds int) error
ContainerRm(name string, config *types.ContainerRmConfig) error
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool) error
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
ContainerStop(name string, seconds int) error
ContainerUnpause(name string) error
ContainerUpdate(name string, hostConfig *container.HostConfig, validateHostname bool) (types.ContainerUpdateResponse, error)

View file

@ -151,10 +151,16 @@ func (s *containerRouter) postContainersStart(ctx context.Context, w http.Respon
hostConfig = c
}
validateHostname := versions.GreaterThanOrEqualTo(version, "1.24")
if err := s.backend.ContainerStart(vars["name"], hostConfig, validateHostname); err != nil {
if err := httputils.ParseForm(r); err != nil {
return err
}
checkpoint := r.Form.Get("checkpoint")
validateHostname := versions.GreaterThanOrEqualTo(version, "1.24")
if err := s.backend.ContainerStart(vars["name"], hostConfig, validateHostname, checkpoint); err != nil {
return err
}
w.WriteHeader(http.StatusNoContent)
return nil
}

View file

@ -124,12 +124,19 @@ type Backend interface {
// ContainerKill stops the container execution abruptly.
ContainerKill(containerID string, sig uint64) error
// ContainerStart starts a new container
ContainerStart(containerID string, hostConfig *container.HostConfig, validateHostname bool) error
ContainerStart(containerID string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
// ContainerWait stops processing until the given container is stopped.
ContainerWait(containerID string, timeout time.Duration) (int, error)
// ContainerUpdateCmdOnBuild updates container.Path and container.Args
ContainerUpdateCmdOnBuild(containerID string, cmd []string) error
// CheckpointCreate checkpoints a running container
CheckpointCreate(container string, config types.CheckpointCreateOptions) error
// CheckpointDelete deletes a container's checkpoint
CheckpointDelete(container string, checkpoint string) error
// CheckpointList lists the available checkpoints for a container
CheckpointList(container string) ([]types.Checkpoint, error)
// ContainerCopy copies/extracts a source FileInfo to a destination path inside a container
// specified by a container object.
// TODO: make an Extract method instead of passing `decompress`

View file

@ -555,7 +555,7 @@ func (b *Builder) run(cID string) (err error) {
}
}()
if err := b.docker.ContainerStart(cID, nil, true); err != nil {
if err := b.docker.ContainerStart(cID, nil, true, ""); err != nil {
return err
}

View file

@ -0,0 +1,12 @@
// +build !experimental
package checkpoint
import (
"github.com/docker/docker/cli/command"
"github.com/spf13/cobra"
)
// NewCheckpointCommand returns a cobra command for `checkpoint` subcommands
func NewCheckpointCommand(rootCmd *cobra.Command, dockerCli *command.DockerCli) {
}

View file

@ -0,0 +1,31 @@
// +build experimental
package checkpoint
import (
"fmt"
"github.com/spf13/cobra"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
)
// NewCheckpointCommand returns a cobra command for `checkpoint` subcommands
func NewCheckpointCommand(rootCmd *cobra.Command, dockerCli *command.DockerCli) {
cmd := &cobra.Command{
Use: "checkpoint",
Short: "Manage Container Checkpoints",
Args: cli.NoArgs,
Run: func(cmd *cobra.Command, args []string) {
fmt.Fprintf(dockerCli.Err(), "\n"+cmd.UsageString())
},
}
cmd.AddCommand(
newCreateCommand(dockerCli),
newListCommand(dockerCli),
newRemoveCommand(dockerCli),
)
rootCmd.AddCommand(cmd)
}

View file

@ -0,0 +1,54 @@
// +build experimental
package checkpoint
import (
"golang.org/x/net/context"
"github.com/docker/docker/api/types"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
"github.com/spf13/cobra"
)
type createOptions struct {
container string
checkpoint string
leaveRunning bool
}
func newCreateCommand(dockerCli *command.DockerCli) *cobra.Command {
var opts createOptions
cmd := &cobra.Command{
Use: "create CONTAINER CHECKPOINT",
Short: "Create a checkpoint from a running container",
Args: cli.ExactArgs(2),
RunE: func(cmd *cobra.Command, args []string) error {
opts.container = args[0]
opts.checkpoint = args[1]
return runCreate(dockerCli, opts)
},
}
flags := cmd.Flags()
flags.BoolVar(&opts.leaveRunning, "leave-running", false, "leave the container running after checkpoing")
return cmd
}
func runCreate(dockerCli *command.DockerCli, opts createOptions) error {
client := dockerCli.Client()
checkpointOpts := types.CheckpointCreateOptions{
CheckpointID: opts.checkpoint,
Exit: !opts.leaveRunning,
}
err := client.CheckpointCreate(context.Background(), opts.container, checkpointOpts)
if err != nil {
return err
}
return nil
}

View file

@ -0,0 +1,47 @@
// +build experimental
package checkpoint
import (
"fmt"
"text/tabwriter"
"golang.org/x/net/context"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
"github.com/spf13/cobra"
)
func newListCommand(dockerCli *command.DockerCli) *cobra.Command {
return &cobra.Command{
Use: "ls CONTAINER",
Aliases: []string{"list"},
Short: "List checkpoints for a container",
Args: cli.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return runList(dockerCli, args[0])
},
}
}
func runList(dockerCli *command.DockerCli, container string) error {
client := dockerCli.Client()
checkpoints, err := client.CheckpointList(context.Background(), container)
if err != nil {
return err
}
w := tabwriter.NewWriter(dockerCli.Out(), 20, 1, 3, ' ', 0)
fmt.Fprintf(w, "CHECKPOINT NAME")
fmt.Fprintf(w, "\n")
for _, checkpoint := range checkpoints {
fmt.Fprintf(w, "%s\t", checkpoint.Name)
fmt.Fprint(w, "\n")
}
w.Flush()
return nil
}

View file

@ -0,0 +1,28 @@
// +build experimental
package checkpoint
import (
"golang.org/x/net/context"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
"github.com/spf13/cobra"
)
func newRemoveCommand(dockerCli *command.DockerCli) *cobra.Command {
return &cobra.Command{
Use: "rm CONTAINER CHECKPOINT",
Aliases: []string{"remove"},
Short: "Remove a checkpoint",
Args: cli.ExactArgs(2),
RunE: func(cmd *cobra.Command, args []string) error {
return runRemove(dockerCli, args[0], args[1])
},
}
}
func runRemove(dockerCli *command.DockerCli, container string, checkpoint string) error {
client := dockerCli.Client()
return client.CheckpointDelete(context.Background(), container, checkpoint)
}

View file

@ -2,6 +2,7 @@ package commands
import (
"github.com/docker/docker/cli/command"
"github.com/docker/docker/cli/command/checkpoint"
"github.com/docker/docker/cli/command/container"
"github.com/docker/docker/cli/command/image"
"github.com/docker/docker/cli/command/network"
@ -67,5 +68,6 @@ func AddCommands(cmd *cobra.Command, dockerCli *command.DockerCli) {
volume.NewVolumeCommand(dockerCli),
system.NewInfoCommand(dockerCli),
)
checkpoint.NewCheckpointCommand(cmd, dockerCli)
plugin.NewPluginCommand(cmd, dockerCli)
}

View file

@ -20,6 +20,7 @@ type startOptions struct {
attach bool
openStdin bool
detachKeys string
checkpoint string
containers []string
}
@ -42,6 +43,9 @@ func NewStartCommand(dockerCli *command.DockerCli) *cobra.Command {
flags.BoolVarP(&opts.attach, "attach", "a", false, "Attach STDOUT/STDERR and forward signals")
flags.BoolVarP(&opts.openStdin, "interactive", "i", false, "Attach container's STDIN")
flags.StringVar(&opts.detachKeys, "detach-keys", "", "Override the key sequence for detaching a container")
addExperimentalStartFlags(flags, &opts)
return cmd
}
@ -105,9 +109,12 @@ func runStart(dockerCli *command.DockerCli, opts *startOptions) error {
// 3. We should open a channel for receiving status code of the container
// no matter it's detached, removed on daemon side(--rm) or exit normally.
statusChan, statusErr := waitExitOrRemoved(dockerCli, context.Background(), c.ID, c.HostConfig.AutoRemove)
startOptions := types.ContainerStartOptions{
CheckpointID: opts.checkpoint,
}
// 4. Start the container.
if err := dockerCli.Client().ContainerStart(ctx, c.ID, types.ContainerStartOptions{}); err != nil {
if err := dockerCli.Client().ContainerStart(ctx, c.ID, startOptions); err != nil {
cancelFun()
<-cErr
if c.HostConfig.AutoRemove && statusErr == nil {
@ -134,6 +141,16 @@ func runStart(dockerCli *command.DockerCli, opts *startOptions) error {
if status := <-statusChan; status != 0 {
return cli.StatusError{StatusCode: status}
}
} else if opts.checkpoint != "" {
if len(opts.containers) > 1 {
return fmt.Errorf("You cannot restore multiple containers at once.")
}
container := opts.containers[0]
startOptions := types.ContainerStartOptions{
CheckpointID: opts.checkpoint,
}
return dockerCli.Client().ContainerStart(ctx, container, startOptions)
} else {
// We're not going to attach to anything.
// Start as many containers as we want.

View file

@ -0,0 +1,8 @@
// +build !experimental
package container
import "github.com/spf13/pflag"
func addExperimentalStartFlags(flags *pflag.FlagSet, opts *startOptions) {
}

View file

@ -0,0 +1,9 @@
// +build experimental
package container
import "github.com/spf13/pflag"
func addExperimentalStartFlags(flags *pflag.FlagSet, opts *startOptions) {
flags.StringVar(&opts.checkpoint, "checkpoint", "", "Restore from this checkpoint")
}

View file

@ -409,7 +409,7 @@ func initRouter(s *apiserver.Server, d *daemon.Daemon, c *cluster.Cluster) {
if d.NetworkControllerEnabled() {
routers = append(routers, network.NewRouter(d, c))
}
routers = addExperimentalRouters(routers)
routers = addExperimentalRouters(routers, d, decoder)
s.InitRouter(utils.IsDebugEnabled(), routers...)
}

View file

@ -2,8 +2,12 @@
package main
import "github.com/docker/docker/api/server/router"
import (
"github.com/docker/docker/api/server/httputils"
"github.com/docker/docker/api/server/router"
"github.com/docker/docker/daemon"
)
func addExperimentalRouters(routers []router.Router) []router.Router {
func addExperimentalRouters(routers []router.Router, d *daemon.Daemon, decoder httputils.ContainerDecoder) []router.Router {
return routers
}

View file

@ -3,11 +3,14 @@
package main
import (
"github.com/docker/docker/api/server/httputils"
"github.com/docker/docker/api/server/router"
checkpointrouter "github.com/docker/docker/api/server/router/checkpoint"
pluginrouter "github.com/docker/docker/api/server/router/plugin"
"github.com/docker/docker/daemon"
"github.com/docker/docker/plugin"
)
func addExperimentalRouters(routers []router.Router) []router.Router {
return append(routers, pluginrouter.NewRouter(plugin.GetManager()))
func addExperimentalRouters(routers []router.Router, d *daemon.Daemon, decoder httputils.ContainerDecoder) []router.Router {
return append(routers, checkpointrouter.NewRouter(d, decoder), pluginrouter.NewRouter(plugin.GetManager()))
}

View file

@ -306,6 +306,11 @@ func (container *Container) ConfigPath() (string, error) {
return container.GetRootResourcePath(configFileName)
}
// CheckpointDir returns the directory checkpoints are stored in
func (container *Container) CheckpointDir() string {
return filepath.Join(container.Root, "checkpoints")
}
// StartLogger starts a new logger driver for the container.
func (container *Container) StartLogger(cfg containertypes.LogConfig) (logger.Logger, error) {
c, err := logger.GetLogDriver(cfg.Type)

82
daemon/checkpoint.go Normal file
View file

@ -0,0 +1,82 @@
package daemon
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"github.com/docker/docker/api/types"
)
// CheckpointCreate checkpoints the process running in a container with CRIU
func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreateOptions) error {
container, err := daemon.GetContainer(name)
if err != nil {
return err
}
if !container.IsRunning() {
return fmt.Errorf("Container %s not running", name)
}
err = daemon.containerd.CreateCheckpoint(container.ID, config.CheckpointID, container.CheckpointDir(), config.Exit)
if err != nil {
return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
}
daemon.LogContainerEvent(container, "checkpoint")
return nil
}
// CheckpointDelete deletes the specified checkpoint
func (daemon *Daemon) CheckpointDelete(name string, checkpoint string) error {
container, err := daemon.GetContainer(name)
if err != nil {
return err
}
checkpointDir := container.CheckpointDir()
return os.RemoveAll(filepath.Join(checkpointDir, checkpoint))
}
// CheckpointList deletes the specified checkpoint
func (daemon *Daemon) CheckpointList(name string) ([]types.Checkpoint, error) {
response := []types.Checkpoint{}
container, err := daemon.GetContainer(name)
if err != nil {
return response, err
}
checkpointDir := container.CheckpointDir()
if err := os.MkdirAll(checkpointDir, 0755); err != nil {
return nil, err
}
dirs, err := ioutil.ReadDir(checkpointDir)
if err != nil {
return nil, err
}
var out []types.Checkpoint
for _, d := range dirs {
if !d.IsDir() {
continue
}
path := filepath.Join(checkpointDir, d.Name(), "config.json")
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
var cpt types.Checkpoint
if err := json.Unmarshal(data, &cpt); err != nil {
return nil, err
}
out = append(out, cpt)
}
return out, nil
}

View file

@ -24,7 +24,7 @@ type Backend interface {
SetupIngress(req clustertypes.NetworkCreateRequest, nodeIP string) error
PullImage(ctx context.Context, image, tag string, metaHeaders map[string][]string, authConfig *types.AuthConfig, outStream io.Writer) error
CreateManagedContainer(config types.ContainerCreateConfig, validateHostname bool) (types.ContainerCreateResponse, error)
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool) error
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
ContainerStop(name string, seconds int) error
ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error
UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error

View file

@ -220,7 +220,7 @@ func (c *containerAdapter) create(ctx context.Context) error {
func (c *containerAdapter) start(ctx context.Context) error {
version := httputils.VersionFromContext(ctx)
validateHostname := versions.GreaterThanOrEqualTo(version, "1.24")
return c.backend.ContainerStart(c.container.name(), nil, validateHostname)
return c.backend.ContainerStart(c.container.name(), nil, validateHostname, "")
}
func (c *containerAdapter) inspect(ctx context.Context) (types.ContainerJSON, error) {

View file

@ -115,6 +115,9 @@ func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) (
if err := idtools.MkdirAs(container.Root, 0700, rootUID, rootGID); err != nil {
return nil, err
}
if err := idtools.MkdirAs(container.CheckpointDir(), 0700, rootUID, rootGID); err != nil {
return nil, err
}
if err := daemon.setHostConfig(container, params.HostConfig); err != nil {
return nil, err

View file

@ -287,7 +287,7 @@ func (daemon *Daemon) restore() error {
// Make sure networks are available before starting
daemon.waitForNetworks(c)
if err := daemon.containerStart(c); err != nil {
if err := daemon.containerStart(c, ""); err != nil {
logrus.Errorf("Failed to start container %s: %s", c.ID, err)
}
close(chNotify)

View file

@ -28,7 +28,7 @@ func (daemon *Daemon) postRunProcessing(container *container.Container, e libcon
// Create a new servicing container, which will start, complete the update, and merge back the
// results if it succeeded, all as part of the below function call.
if err := daemon.containerd.Create((container.ID + "_servicing"), *spec, servicingOption); err != nil {
if err := daemon.containerd.Create((container.ID + "_servicing"), "", "", *spec, servicingOption); err != nil {
container.SetExitCode(-1)
return fmt.Errorf("Post-run update servicing failed: %s", err)
}

View file

@ -56,7 +56,7 @@ func (daemon *Daemon) containerRestart(container *container.Container, seconds i
}
}
if err := daemon.containerStart(container); err != nil {
if err := daemon.containerStart(container, ""); err != nil {
return err
}

View file

@ -19,7 +19,7 @@ import (
)
// ContainerStart starts a container.
func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, validateHostname bool) error {
func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, validateHostname bool, checkpoint string) error {
container, err := daemon.GetContainer(name)
if err != nil {
return err
@ -78,19 +78,19 @@ func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.Hos
return err
}
return daemon.containerStart(container)
return daemon.containerStart(container, checkpoint)
}
// Start starts a container
func (daemon *Daemon) Start(container *container.Container) error {
return daemon.containerStart(container)
return daemon.containerStart(container, "")
}
// containerStart prepares the container to run by setting up everything the
// container needs, such as storage and networking, as well as links
// between containers. The container is left waiting for a signal to
// begin running.
func (daemon *Daemon) containerStart(container *container.Container) (err error) {
func (daemon *Daemon) containerStart(container *container.Container, checkpoint string) (err error) {
container.Lock()
defer container.Unlock()
@ -150,7 +150,7 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
createOptions = append(createOptions, *copts...)
}
if err := daemon.containerd.Create(container.ID, *spec, createOptions...); err != nil {
if err := daemon.containerd.Create(container.ID, checkpoint, container.CheckpointDir(), *spec, createOptions...); err != nil {
errDesc := grpc.ErrorDesc(err)
logrus.Errorf("Create container failed with error: %s", errDesc)
// if we receive an internal error from the initial start of a container then lets

View file

@ -2,7 +2,7 @@
This page contains a list of features in the Docker engine which are
experimental. Experimental features are **not** ready for production. They are
provided for test and evaluation in your sandbox environments.
provided for test and evaluation in your sandbox environments.
The information below describes each feature and the GitHub pull requests and
issues associated with it. If necessary, links are provided to additional
@ -74,9 +74,10 @@ to build a Docker binary with the experimental features enabled:
* [External graphdriver plugins](plugins_graphdriver.md)
* [Macvlan and Ipvlan Network Drivers](vlan-networks.md)
* [Docker Stacks and Distributed Application Bundles](docker-stacks-and-bundles.md)
* [Checkpoint & Restore](checkpoint-restore.md)
## How to comment on an experimental feature
Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.
Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.
Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user).
Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user).

View file

@ -0,0 +1,75 @@
# Docker Checkpoint & Restore
Checkpoint & Restore is a new feature that allows you to freeze a running
container by checkpointing it, which turns its state into a collection of files
on disk. Later, the container can be restored from the point it was frozen.
This is accomplished using a tool called [CRIU](http://criu.org), which is an
external dependency of this feature. A good overview of the history of
checkpoint and restore in Docker is available in this
[Kubernetes blog post](http://blog.kubernetes.io/2015/07/how-did-quake-demo-from-dockercon-work.html).
## Installing CRIU
If you use a Debian system, you can add the CRIU PPA and install with apt-get
[from the criu launchpad](https://launchpad.net/~criu/+archive/ubuntu/ppa).
Alternatively, you can [build CRIU from source](http://criu.org/Installation).
You need at least version 2.0 of CRIU to run checkpoint/restore in Docker.
## Use cases for checkpoint & restore
This feature is currently focused on single-host use cases for checkpoint and
restore. Here are a few:
- Restarting the host machine without stopping/starting containers
- Speeding up the start time of slow start applications
- "Rewinding" processes to an earlier point in time
- "Forensic debugging" of running processes
Another primary use case of checkpoint & restore outside of Docker is the live
migration of a server from one machine to another. This is possible with the
current implementation, but not currently a priority (and so the workflow is
not optimized for the task).
## Using Checkpoint & Restore
A new top level commands `docker checkpoint` is introduced, with three subcommands:
- `create` (creates a new checkpoint)
- `ls` (lists existing checkpoints)
- `rm` (deletes an existing checkpoint)
Additionally, a `--checkpoint` flag is added to the container start command.
The options for checkpoint create:
Usage: docker checkpoint [OPTIONS] CONTAINER CHECKPOINT_ID
Checkpoint the specified container
--leave-running=false leave the container running after checkpoint
And to restore a container:
Usage: docker start --checkpoint CHECKPOINT_ID [OTHER OPTIONS] CONTAINER
A simple example of using checkpoint & restore on a container:
$ docker run --security-opt=seccomp:unconfined --name cr -d busybox /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done'
> abc0123
$ docker checkpoint create cr checkpoint1
# <later>
$ docker start --checkpoint checkpoint1 cr
> abc0123
This process just logs an incrementing counter to stdout. If you `docker logs`
in between running/checkpoint/restoring you should see that the counter
increases while the process is running, stops while it's checkpointed, and
resumes from the point it left off once you restore.
Note that seccomp is only supported by CRIU in very up to date kernels.

View file

@ -10,6 +10,7 @@ import (
"github.com/docker/docker/pkg/homedir"
"github.com/docker/docker/pkg/integration/checker"
icmd "github.com/docker/docker/pkg/integration/cmd"
"github.com/docker/docker/utils"
"github.com/go-check/check"
)
@ -122,6 +123,12 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) {
cmdsToTest = append(cmdsToTest, "network ls")
cmdsToTest = append(cmdsToTest, "network rm")
if utils.ExperimentalBuild() {
cmdsToTest = append(cmdsToTest, "checkpoint create")
cmdsToTest = append(cmdsToTest, "checkpoint ls")
cmdsToTest = append(cmdsToTest, "checkpoint rm")
}
// Divide the list of commands into go routines and run the func testcommand on the commands in parallel
// to save runtime of test

View file

@ -133,7 +133,7 @@ func (clnt *client) prepareBundleDir(uid, gid int) (string, error) {
return p, nil
}
func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) {
func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) (err error) {
clnt.lock(containerID)
defer clnt.unlock(containerID)
@ -180,7 +180,7 @@ func (clnt *client) Create(containerID string, spec Spec, options ...CreateOptio
return err
}
return container.start()
return container.start(checkpoint, checkpointDir)
}
func (clnt *client) Signal(containerID string, sig int) error {
@ -625,3 +625,57 @@ func (en *exitNotifier) close() {
func (en *exitNotifier) wait() <-chan struct{} {
return en.c
}
func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
clnt.lock(containerID)
defer clnt.unlock(containerID)
if _, err := clnt.getContainer(containerID); err != nil {
return err
}
_, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{
Id: containerID,
Checkpoint: &containerd.Checkpoint{
Name: checkpointID,
Exit: exit,
Tcp: true,
UnixSockets: true,
Shell: false,
EmptyNS: []string{"network"},
},
CheckpointDir: checkpointDir,
})
return err
}
func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
clnt.lock(containerID)
defer clnt.unlock(containerID)
if _, err := clnt.getContainer(containerID); err != nil {
return err
}
_, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{
Id: containerID,
Name: checkpointID,
CheckpointDir: checkpointDir,
})
return err
}
func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
clnt.lock(containerID)
defer clnt.unlock(containerID)
if _, err := clnt.getContainer(containerID); err != nil {
return nil, err
}
resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{
Id: containerID,
CheckpointDir: checkpointDir,
})
if err != nil {
return nil, err
}
return (*Checkpoints)(resp), nil
}

View file

@ -12,7 +12,7 @@ func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendly
return nil
}
func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) {
func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) (err error) {
return nil
}

View file

@ -37,7 +37,7 @@ const defaultOwner = "docker"
// Create is the entrypoint to create a container from a spec, and if successfully
// created, start it too.
func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) error {
func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) error {
logrus.Debugln("libcontainerd: client.Create() with spec", spec)
configuration := &hcsshim.ContainerConfig{
@ -435,3 +435,15 @@ func (clnt *client) UpdateResources(containerID string, resources Resources) err
// but we should return nil for enabling updating container
return nil
}
func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
return errors.New("Windows: Containers do not support checkpoints")
}
func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
return errors.New("Windows: Containers do not support checkpoints")
}
func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
return nil, errors.New("Windows: Containers do not support checkpoints")
}

View file

@ -86,7 +86,7 @@ func (ctr *container) spec() (*specs.Spec, error) {
return &spec, nil
}
func (ctr *container) start() error {
func (ctr *container) start(checkpoint string, checkpointDir string) error {
spec, err := ctr.spec()
if err != nil {
return nil
@ -97,11 +97,13 @@ func (ctr *container) start() error {
}
r := &containerd.CreateContainerRequest{
Id: ctr.containerID,
BundlePath: ctr.dir,
Stdin: ctr.fifo(syscall.Stdin),
Stdout: ctr.fifo(syscall.Stdout),
Stderr: ctr.fifo(syscall.Stderr),
Id: ctr.containerID,
BundlePath: ctr.dir,
Stdin: ctr.fifo(syscall.Stdin),
Stdout: ctr.fifo(syscall.Stdout),
Stderr: ctr.fifo(syscall.Stderr),
Checkpoint: checkpoint,
CheckpointDir: checkpointDir,
// check to see if we are running in ramdisk to disable pivot root
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
Runtime: ctr.runtime,
@ -191,7 +193,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
defer ctr.client.unlock(ctr.containerID)
ctr.restarting = false
if err == nil {
if err = ctr.start(); err != nil {
if err = ctr.start("", ""); err != nil {
logrus.Errorf("libcontainerd: error restarting %v", err)
}
}

View file

@ -261,7 +261,7 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err
ctr.restarting = false
ctr.client.deleteContainer(ctr.friendlyName)
if err == nil {
if err = ctr.client.Create(ctr.containerID, ctr.ociSpec, ctr.options...); err != nil {
if err = ctr.client.Create(ctr.containerID, "", "", ctr.ociSpec, ctr.options...); err != nil {
logrus.Errorf("libcontainerd: error restarting %v", err)
}
}

View file

@ -36,7 +36,7 @@ type Backend interface {
// Client provides access to containerd features.
type Client interface {
Create(containerID string, spec Spec, options ...CreateOption) error
Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) error
Signal(containerID string, sig int) error
SignalProcess(containerID string, processFriendlyName string, sig int) error
AddProcess(ctx context.Context, containerID, processFriendlyName string, process Process) error
@ -48,6 +48,9 @@ type Client interface {
GetPidsForContainer(containerID string) ([]int, error)
Summary(containerID string) ([]Summary, error)
UpdateResources(containerID string, resources Resources) error
CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error
DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error
ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error)
}
// CreateOption allows to configure parameters of container creation.

View file

@ -53,3 +53,6 @@ type User specs.User
// Resources defines updatable container resource values.
type Resources containerd.UpdateResource
// Checkpoints contains the details of a checkpoint
type Checkpoints containerd.ListCheckpointResponse

View file

@ -37,3 +37,13 @@ type Resources struct{}
type ServicingOption struct {
IsServicing bool
}
// Checkpoint holds the details of a checkpoint (not supported in windows)
type Checkpoint struct {
Name string
}
// Checkpoints contains the details of a checkpoint
type Checkpoints struct {
Checkpoints []*Checkpoint
}

View file

@ -27,7 +27,7 @@ func (pm *Manager) enable(p *v2.Plugin, force bool) error {
}
p.RestartManager = restartmanager.New(container.RestartPolicy{Name: "always"}, 0)
if err := pm.containerdClient.Create(p.GetID(), libcontainerd.Spec(*spec), libcontainerd.WithRestartManager(p.RestartManager)); err != nil {
if err := pm.containerdClient.Create(p.GetID(), "", "", libcontainerd.Spec(*spec), libcontainerd.WithRestartManager(p.RestartManager)); err != nil {
if err := p.RestartManager.Cancel(); err != nil {
logrus.Errorf("enable: restartManager.Cancel failed due to %v", err)
}