Browse Source

Merge pull request #31273 from fabiokung/consistent-ro-view

No container locks on `docker ps`
Aaron Lehmann 8 years ago
parent
commit
56ad9bb1b4

+ 51 - 28
container/container.go

@@ -1,6 +1,7 @@
 package container
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -14,8 +15,6 @@ import (
 	"syscall"
 	"time"
 
-	"golang.org/x/net/context"
-
 	"github.com/Sirupsen/logrus"
 	containertypes "github.com/docker/docker/api/types/container"
 	mounttypes "github.com/docker/docker/api/types/mount"
@@ -45,7 +44,7 @@ import (
 	"github.com/docker/libnetwork/options"
 	"github.com/docker/libnetwork/types"
 	agentexec "github.com/docker/swarmkit/agent/exec"
-	"github.com/opencontainers/selinux/go-selinux/label"
+	"golang.org/x/net/context"
 )
 
 const configFileName = "config.v2.json"
@@ -152,41 +151,51 @@ func (container *Container) FromDisk() error {
 		container.Platform = runtime.GOOS
 	}
 
-	if err := label.ReserveLabel(container.ProcessLabel); err != nil {
-		return err
-	}
 	return container.readHostConfig()
 }
 
-// ToDisk saves the container configuration on disk.
-func (container *Container) ToDisk() error {
+// toDisk saves the container configuration on disk and returns a deep copy.
+func (container *Container) toDisk() (*Container, error) {
+	var (
+		buf      bytes.Buffer
+		deepCopy Container
+	)
 	pth, err := container.ConfigPath()
 	if err != nil {
-		return err
+		return nil, err
 	}
 
-	jsonSource, err := ioutils.NewAtomicFileWriter(pth, 0644)
+	// Save container settings
+	f, err := ioutils.NewAtomicFileWriter(pth, 0644)
 	if err != nil {
-		return err
+		return nil, err
 	}
-	defer jsonSource.Close()
+	defer f.Close()
 
-	enc := json.NewEncoder(jsonSource)
+	w := io.MultiWriter(&buf, f)
+	if err := json.NewEncoder(w).Encode(container); err != nil {
+		return nil, err
+	}
 
-	// Save container settings
-	if err := enc.Encode(container); err != nil {
-		return err
+	if err := json.NewDecoder(&buf).Decode(&deepCopy); err != nil {
+		return nil, err
+	}
+	deepCopy.HostConfig, err = container.WriteHostConfig()
+	if err != nil {
+		return nil, err
 	}
 
-	return container.WriteHostConfig()
+	return &deepCopy, nil
 }
 
-// ToDiskLocking saves the container configuration on disk in a thread safe way.
-func (container *Container) ToDiskLocking() error {
-	container.Lock()
-	err := container.ToDisk()
-	container.Unlock()
-	return err
+// CheckpointTo makes the Container's current state visible to queries, and persists state.
+// Callers must hold a Container lock.
+func (container *Container) CheckpointTo(store ViewDB) error {
+	deepCopy, err := container.toDisk()
+	if err != nil {
+		return err
+	}
+	return store.Save(deepCopy)
 }
 
 // readHostConfig reads the host configuration from disk for the container.
@@ -218,20 +227,34 @@ func (container *Container) readHostConfig() error {
 	return nil
 }
 
-// WriteHostConfig saves the host configuration on disk for the container.
-func (container *Container) WriteHostConfig() error {
+// WriteHostConfig saves the host configuration on disk for the container,
+// and returns a deep copy of the saved object. Callers must hold a Container lock.
+func (container *Container) WriteHostConfig() (*containertypes.HostConfig, error) {
+	var (
+		buf      bytes.Buffer
+		deepCopy containertypes.HostConfig
+	)
+
 	pth, err := container.HostConfigPath()
 	if err != nil {
-		return err
+		return nil, err
 	}
 
 	f, err := ioutils.NewAtomicFileWriter(pth, 0644)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	defer f.Close()
 
-	return json.NewEncoder(f).Encode(&container.HostConfig)
+	w := io.MultiWriter(&buf, f)
+	if err := json.NewEncoder(w).Encode(&container.HostConfig); err != nil {
+		return nil, err
+	}
+
+	if err := json.NewDecoder(&buf).Decode(&deepCopy); err != nil {
+		return nil, err
+	}
+	return &deepCopy, nil
 }
 
 // SetupWorkingDirectory sets up the container's working directory as set in container.Config.WorkingDir

+ 20 - 9
container/container_unix.go

@@ -10,6 +10,7 @@ import (
 	"strings"
 
 	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/api/types"
 	containertypes "github.com/docker/docker/api/types/container"
 	mounttypes "github.com/docker/docker/api/types/mount"
 	"github.com/docker/docker/pkg/chrootarchive"
@@ -261,11 +262,8 @@ func (container *Container) ConfigMounts() []Mount {
 	return mounts
 }
 
-// UpdateContainer updates configuration of a container.
+// UpdateContainer updates configuration of a container. Callers must hold a Lock on the Container.
 func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
-	container.Lock()
-	defer container.Unlock()
-
 	// update resources of container
 	resources := hostConfig.Resources
 	cResources := &container.HostConfig.Resources
@@ -334,11 +332,6 @@ func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfi
 		container.HostConfig.RestartPolicy = hostConfig.RestartPolicy
 	}
 
-	if err := container.ToDisk(); err != nil {
-		logrus.Errorf("Error saving updated container: %v", err)
-		return err
-	}
-
 	return nil
 }
 
@@ -462,3 +455,21 @@ func cleanResourcePath(path string) string {
 func (container *Container) EnableServiceDiscoveryOnDefaultNetwork() bool {
 	return false
 }
+
+// GetMountPoints gives a platform specific transformation to types.MountPoint. Callers must hold a Container lock.
+func (container *Container) GetMountPoints() []types.MountPoint {
+	mountPoints := make([]types.MountPoint, 0, len(container.MountPoints))
+	for _, m := range container.MountPoints {
+		mountPoints = append(mountPoints, types.MountPoint{
+			Type:        m.Type,
+			Name:        m.Name,
+			Source:      m.Path(),
+			Destination: m.Destination,
+			Driver:      m.Driver,
+			Mode:        m.Mode,
+			RW:          m.RW,
+			Propagation: m.Propagation,
+		})
+	}
+	return mountPoints
+}

+ 18 - 4
container/container_windows.go

@@ -7,6 +7,7 @@ import (
 	"os"
 	"path/filepath"
 
+	"github.com/docker/docker/api/types"
 	containertypes "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/pkg/system"
 )
@@ -125,11 +126,8 @@ func (container *Container) TmpfsMounts() ([]Mount, error) {
 	return mounts, nil
 }
 
-// UpdateContainer updates configuration of a container
+// UpdateContainer updates configuration of a container. Callers must hold a Lock on the Container.
 func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
-	container.Lock()
-	defer container.Unlock()
-
 	resources := hostConfig.Resources
 	if resources.CPUShares != 0 ||
 		resources.Memory != 0 ||
@@ -194,3 +192,19 @@ func (container *Container) BuildHostnameFile() error {
 func (container *Container) EnableServiceDiscoveryOnDefaultNetwork() bool {
 	return true
 }
+
+// GetMountPoints gives a platform specific transformation to types.MountPoint. Callers must hold a Container lock.
+func (container *Container) GetMountPoints() []types.MountPoint {
+	mountPoints := make([]types.MountPoint, 0, len(container.MountPoints))
+	for _, m := range container.MountPoints {
+		mountPoints = append(mountPoints, types.MountPoint{
+			Type:        m.Type,
+			Name:        m.Name,
+			Source:      m.Path(),
+			Destination: m.Destination,
+			Driver:      m.Driver,
+			RW:          m.RW,
+		})
+	}
+	return mountPoints
+}

+ 4 - 3
container/health.go

@@ -13,9 +13,8 @@ type Health struct {
 
 // String returns a human-readable description of the health-check state
 func (s *Health) String() string {
-	// This happens when the container is being shutdown and the monitor has stopped
-	// or the monitor has yet to be setup.
-	if s.stop == nil {
+	// This happens when the monitor has yet to be setup.
+	if s.Status == "" {
 		return types.Unhealthy
 	}
 
@@ -44,6 +43,8 @@ func (s *Health) CloseMonitorChannel() {
 		logrus.Debug("CloseMonitorChannel: waiting for probe to stop")
 		close(s.stop)
 		s.stop = nil
+		// unhealthy when the monitor has stopped for compatibility reasons
+		s.Status = types.Unhealthy
 		logrus.Debug("CloseMonitorChannel done")
 	}
 }

+ 288 - 0
container/view.go

@@ -0,0 +1,288 @@
+package container
+
+import (
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/api/types"
+	"github.com/docker/docker/api/types/network"
+	"github.com/docker/docker/pkg/registrar"
+	"github.com/docker/go-connections/nat"
+	"github.com/hashicorp/go-memdb"
+)
+
+const (
+	memdbTable   = "containers"
+	memdbIDIndex = "id"
+)
+
+// Snapshot is a read only view for Containers. It holds all information necessary to serve container queries in a
+// versioned ACID in-memory store.
+type Snapshot struct {
+	types.Container
+
+	// additional info queries need to filter on
+	// preserve nanosec resolution for queries
+	CreatedAt    time.Time
+	StartedAt    time.Time
+	Name         string
+	Pid          int
+	ExitCode     int
+	Running      bool
+	Paused       bool
+	Managed      bool
+	ExposedPorts nat.PortSet
+	PortBindings nat.PortSet
+	Health       string
+	HostConfig   struct {
+		Isolation string
+	}
+}
+
+// ViewDB provides an in-memory transactional (ACID) container Store
+type ViewDB interface {
+	Snapshot(nameIndex *registrar.Registrar) View
+	Save(*Container) error
+	Delete(*Container) error
+}
+
+// View can be used by readers to avoid locking
+type View interface {
+	All() ([]Snapshot, error)
+	Get(id string) (*Snapshot, error)
+}
+
+var schema = &memdb.DBSchema{
+	Tables: map[string]*memdb.TableSchema{
+		memdbTable: {
+			Name: memdbTable,
+			Indexes: map[string]*memdb.IndexSchema{
+				memdbIDIndex: {
+					Name:    memdbIDIndex,
+					Unique:  true,
+					Indexer: &containerByIDIndexer{},
+				},
+			},
+		},
+	},
+}
+
+type memDB struct {
+	store *memdb.MemDB
+}
+
+// NewViewDB provides the default implementation, with the default schema
+func NewViewDB() (ViewDB, error) {
+	store, err := memdb.NewMemDB(schema)
+	if err != nil {
+		return nil, err
+	}
+	return &memDB{store: store}, nil
+}
+
+// Snapshot provides a consistent read-only View of the database
+func (db *memDB) Snapshot(index *registrar.Registrar) View {
+	return &memdbView{
+		txn:       db.store.Txn(false),
+		nameIndex: index.GetAll(),
+	}
+}
+
+// Save atomically updates the in-memory store state for a Container.
+// Only read only (deep) copies of containers may be passed in.
+func (db *memDB) Save(c *Container) error {
+	txn := db.store.Txn(true)
+	defer txn.Commit()
+	return txn.Insert(memdbTable, c)
+}
+
+// Delete removes an item by ID
+func (db *memDB) Delete(c *Container) error {
+	txn := db.store.Txn(true)
+	defer txn.Commit()
+	return txn.Delete(memdbTable, NewBaseContainer(c.ID, c.Root))
+}
+
+type memdbView struct {
+	txn       *memdb.Txn
+	nameIndex map[string][]string
+}
+
+// All returns a all items in this snapshot. Returned objects must never be modified.
+func (v *memdbView) All() ([]Snapshot, error) {
+	var all []Snapshot
+	iter, err := v.txn.Get(memdbTable, memdbIDIndex)
+	if err != nil {
+		return nil, err
+	}
+	for {
+		item := iter.Next()
+		if item == nil {
+			break
+		}
+		snapshot := v.transform(item.(*Container))
+		all = append(all, *snapshot)
+	}
+	return all, nil
+}
+
+// Get returns an item by id. Returned objects must never be modified.
+func (v *memdbView) Get(id string) (*Snapshot, error) {
+	s, err := v.txn.First(memdbTable, memdbIDIndex, id)
+	if err != nil {
+		return nil, err
+	}
+	return v.transform(s.(*Container)), nil
+}
+
+// transform maps a (deep) copied Container object to what queries need.
+// A lock on the Container is not held because these are immutable deep copies.
+func (v *memdbView) transform(container *Container) *Snapshot {
+	snapshot := &Snapshot{
+		Container: types.Container{
+			ID:      container.ID,
+			Names:   v.nameIndex[container.ID],
+			ImageID: container.ImageID.String(),
+			Ports:   []types.Port{},
+			Mounts:  container.GetMountPoints(),
+			State:   container.State.StateString(),
+			Status:  container.State.String(),
+			Created: container.Created.Unix(),
+		},
+		CreatedAt:    container.Created,
+		StartedAt:    container.StartedAt,
+		Name:         container.Name,
+		Pid:          container.Pid,
+		Managed:      container.Managed,
+		ExposedPorts: make(nat.PortSet),
+		PortBindings: make(nat.PortSet),
+		Health:       container.HealthString(),
+		Running:      container.Running,
+		Paused:       container.Paused,
+		ExitCode:     container.ExitCode(),
+	}
+
+	if snapshot.Names == nil {
+		// Dead containers will often have no name, so make sure the response isn't null
+		snapshot.Names = []string{}
+	}
+
+	if container.HostConfig != nil {
+		snapshot.Container.HostConfig.NetworkMode = string(container.HostConfig.NetworkMode)
+		snapshot.HostConfig.Isolation = string(container.HostConfig.Isolation)
+		for binding := range container.HostConfig.PortBindings {
+			snapshot.PortBindings[binding] = struct{}{}
+		}
+	}
+
+	if container.Config != nil {
+		snapshot.Image = container.Config.Image
+		snapshot.Labels = container.Config.Labels
+		for exposed := range container.Config.ExposedPorts {
+			snapshot.ExposedPorts[exposed] = struct{}{}
+		}
+	}
+
+	if len(container.Args) > 0 {
+		args := []string{}
+		for _, arg := range container.Args {
+			if strings.Contains(arg, " ") {
+				args = append(args, fmt.Sprintf("'%s'", arg))
+			} else {
+				args = append(args, arg)
+			}
+		}
+		argsAsString := strings.Join(args, " ")
+		snapshot.Command = fmt.Sprintf("%s %s", container.Path, argsAsString)
+	} else {
+		snapshot.Command = container.Path
+	}
+
+	snapshot.Ports = []types.Port{}
+	networks := make(map[string]*network.EndpointSettings)
+	if container.NetworkSettings != nil {
+		for name, netw := range container.NetworkSettings.Networks {
+			if netw == nil || netw.EndpointSettings == nil {
+				continue
+			}
+			networks[name] = &network.EndpointSettings{
+				EndpointID:          netw.EndpointID,
+				Gateway:             netw.Gateway,
+				IPAddress:           netw.IPAddress,
+				IPPrefixLen:         netw.IPPrefixLen,
+				IPv6Gateway:         netw.IPv6Gateway,
+				GlobalIPv6Address:   netw.GlobalIPv6Address,
+				GlobalIPv6PrefixLen: netw.GlobalIPv6PrefixLen,
+				MacAddress:          netw.MacAddress,
+				NetworkID:           netw.NetworkID,
+			}
+			if netw.IPAMConfig != nil {
+				networks[name].IPAMConfig = &network.EndpointIPAMConfig{
+					IPv4Address: netw.IPAMConfig.IPv4Address,
+					IPv6Address: netw.IPAMConfig.IPv6Address,
+				}
+			}
+		}
+		for port, bindings := range container.NetworkSettings.Ports {
+			p, err := nat.ParsePort(port.Port())
+			if err != nil {
+				logrus.Warnf("invalid port map %+v", err)
+				continue
+			}
+			if len(bindings) == 0 {
+				snapshot.Ports = append(snapshot.Ports, types.Port{
+					PrivatePort: uint16(p),
+					Type:        port.Proto(),
+				})
+				continue
+			}
+			for _, binding := range bindings {
+				h, err := nat.ParsePort(binding.HostPort)
+				if err != nil {
+					logrus.Warnf("invalid host port map %+v", err)
+					continue
+				}
+				snapshot.Ports = append(snapshot.Ports, types.Port{
+					PrivatePort: uint16(p),
+					PublicPort:  uint16(h),
+					Type:        port.Proto(),
+					IP:          binding.HostIP,
+				})
+			}
+		}
+	}
+	snapshot.NetworkSettings = &types.SummaryNetworkSettings{Networks: networks}
+
+	return snapshot
+}
+
+// containerByIDIndexer is used to extract the ID field from Container types.
+// memdb.StringFieldIndex can not be used since ID is a field from an embedded struct.
+type containerByIDIndexer struct{}
+
+// FromObject implements the memdb.SingleIndexer interface for Container objects
+func (e *containerByIDIndexer) FromObject(obj interface{}) (bool, []byte, error) {
+	c, ok := obj.(*Container)
+	if !ok {
+		return false, nil, fmt.Errorf("%T is not a Container", obj)
+	}
+	// Add the null character as a terminator
+	v := c.ID + "\x00"
+	return true, []byte(v), nil
+}
+
+// FromArgs implements the memdb.Indexer interface
+func (e *containerByIDIndexer) FromArgs(args ...interface{}) ([]byte, error) {
+	if len(args) != 1 {
+		return nil, fmt.Errorf("must provide only a single argument")
+	}
+	arg, ok := args[0].(string)
+	if !ok {
+		return nil, fmt.Errorf("argument must be a string: %#v", args[0])
+	}
+	// Add the null character as a terminator
+	arg += "\x00"
+	return []byte(arg), nil
+}

+ 106 - 0
container/view_test.go

@@ -0,0 +1,106 @@
+package container
+
+import (
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"testing"
+
+	containertypes "github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/pkg/registrar"
+	"github.com/pborman/uuid"
+)
+
+var root string
+
+func TestMain(m *testing.M) {
+	var err error
+	root, err = ioutil.TempDir("", "docker-container-test-")
+	if err != nil {
+		panic(err)
+	}
+	defer os.RemoveAll(root)
+
+	os.Exit(m.Run())
+}
+
+func newContainer(t *testing.T) *Container {
+	var (
+		id    = uuid.New()
+		cRoot = filepath.Join(root, id)
+	)
+	if err := os.MkdirAll(cRoot, 0755); err != nil {
+		t.Fatal(err)
+	}
+	c := NewBaseContainer(id, cRoot)
+	c.HostConfig = &containertypes.HostConfig{}
+	return c
+}
+
+func TestViewSaveDelete(t *testing.T) {
+	db, err := NewViewDB()
+	if err != nil {
+		t.Fatal(err)
+	}
+	c := newContainer(t)
+	if err := c.CheckpointTo(db); err != nil {
+		t.Fatal(err)
+	}
+	if err := db.Delete(c); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestViewAll(t *testing.T) {
+	var (
+		db, _ = NewViewDB()
+		names = registrar.NewRegistrar()
+		one   = newContainer(t)
+		two   = newContainer(t)
+	)
+	one.Pid = 10
+	if err := one.CheckpointTo(db); err != nil {
+		t.Fatal(err)
+	}
+	two.Pid = 20
+	if err := two.CheckpointTo(db); err != nil {
+		t.Fatal(err)
+	}
+
+	all, err := db.Snapshot(names).All()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if l := len(all); l != 2 {
+		t.Fatalf("expected 2 items, got %d", l)
+	}
+	byID := make(map[string]Snapshot)
+	for i := range all {
+		byID[all[i].ID] = all[i]
+	}
+	if s, ok := byID[one.ID]; !ok || s.Pid != 10 {
+		t.Fatalf("expected something different with for id=%s: %v", one.ID, s)
+	}
+	if s, ok := byID[two.ID]; !ok || s.Pid != 20 {
+		t.Fatalf("expected something different with for id=%s: %v", two.ID, s)
+	}
+}
+
+func TestViewGet(t *testing.T) {
+	var (
+		db, _ = NewViewDB()
+		names = registrar.NewRegistrar()
+		one   = newContainer(t)
+	)
+	one.ImageID = "some-image-123"
+	if err := one.CheckpointTo(db); err != nil {
+		t.Fatal(err)
+	}
+	s, err := db.Snapshot(names).Get(one.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if s == nil || s.ImageID != "some-image-123" {
+		t.Fatalf("expected ImageID=some-image-123. Got: %v", s)
+	}
+}

+ 13 - 3
daemon/container.go

@@ -18,6 +18,7 @@ import (
 	"github.com/docker/docker/pkg/truncindex"
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/go-connections/nat"
+	"github.com/opencontainers/selinux/go-selinux/label"
 )
 
 // GetContainer looks for a container using the provided information, which could be
@@ -90,6 +91,9 @@ func (daemon *Daemon) load(id string) (*container.Container, error) {
 	if err := container.FromDisk(); err != nil {
 		return nil, err
 	}
+	if err := label.ReserveLabel(container.ProcessLabel); err != nil {
+		return nil, err
+	}
 
 	if container.ID != id {
 		return container, fmt.Errorf("Container %s is stored at %s", container.ID, id)
@@ -99,7 +103,7 @@ func (daemon *Daemon) load(id string) (*container.Container, error) {
 }
 
 // Register makes a container object usable by the daemon as <container.ID>
-func (daemon *Daemon) Register(c *container.Container) {
+func (daemon *Daemon) Register(c *container.Container) error {
 	// Attach to stdout and stderr
 	if c.Config.OpenStdin {
 		c.StreamConfig.NewInputPipes()
@@ -107,8 +111,14 @@ func (daemon *Daemon) Register(c *container.Container) {
 		c.StreamConfig.NewNopInputPipe()
 	}
 
+	// once in the memory store it is visible to other goroutines
+	// grab a Lock until it has been checkpointed to avoid races
+	c.Lock()
+	defer c.Unlock()
+
 	daemon.containers.Add(c.ID, c)
 	daemon.idIndex.Add(c.ID)
+	return c.CheckpointTo(daemon.containersReplica)
 }
 
 func (daemon *Daemon) newContainer(name string, platform string, config *containertypes.Config, hostConfig *containertypes.HostConfig, imgID image.ID, managed bool) (*container.Container, error) {
@@ -212,7 +222,7 @@ func (daemon *Daemon) setHostConfig(container *container.Container, hostConfig *
 
 	runconfig.SetDefaultNetModeIfBlank(hostConfig)
 	container.HostConfig = hostConfig
-	return container.ToDisk()
+	return container.CheckpointTo(daemon.containersReplica)
 }
 
 // verifyContainerSettings performs validation of the hostconfig and config
@@ -301,7 +311,7 @@ func (daemon *Daemon) verifyContainerSettings(hostConfig *containertypes.HostCon
 			return nil, fmt.Errorf("maximum retry count cannot be negative")
 		}
 	case "":
-	// do nothing
+		// do nothing
 	default:
 		return nil, fmt.Errorf("invalid restart policy '%s'", p.Name)
 	}

+ 9 - 10
daemon/container_operations.go

@@ -44,6 +44,7 @@ func (daemon *Daemon) getDNSSearchSettings(container *container.Container) []str
 
 	return nil
 }
+
 func (daemon *Daemon) buildSandboxOptions(container *container.Container) ([]libnetwork.SandboxOption, error) {
 	var (
 		sboxOptions []libnetwork.SandboxOption
@@ -568,7 +569,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
 
 	}
 
-	if err := container.WriteHostConfig(); err != nil {
+	if _, err := container.WriteHostConfig(); err != nil {
 		return err
 	}
 	networkActions.WithValues("allocate").UpdateSince(start)
@@ -1005,10 +1006,8 @@ func (daemon *Daemon) ConnectToNetwork(container *container.Container, idOrName
 			return err
 		}
 	}
-	if err := container.ToDisk(); err != nil {
-		return fmt.Errorf("Error saving container to disk: %v", err)
-	}
-	return nil
+
+	return container.CheckpointTo(daemon.containersReplica)
 }
 
 // DisconnectFromNetwork disconnects container from network n.
@@ -1044,16 +1043,16 @@ func (daemon *Daemon) DisconnectFromNetwork(container *container.Container, netw
 		return err
 	}
 
-	if err := container.ToDisk(); err != nil {
-		return fmt.Errorf("Error saving container to disk: %v", err)
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+		return err
 	}
 
 	if n != nil {
-		attributes := map[string]string{
+		daemon.LogNetworkEventWithAttributes(n, "disconnect", map[string]string{
 			"container": container.ID,
-		}
-		daemon.LogNetworkEventWithAttributes(n, "disconnect", attributes)
+		})
 	}
+
 	return nil
 }
 

+ 1 - 4
daemon/create.go

@@ -167,12 +167,9 @@ func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) (
 	runconfig.SetDefaultNetModeIfBlank(container.HostConfig)
 
 	daemon.updateContainerNetworkSettings(container, endpointsConfigs)
-
-	if err := container.ToDisk(); err != nil {
-		logrus.Errorf("Error saving new container to disk: %v", err)
+	if err := daemon.Register(container); err != nil {
 		return nil, err
 	}
-	daemon.Register(container)
 	stateCtr.set(container.ID, "stopped")
 	daemon.LogContainerEvent(container, "create")
 	return container, nil

+ 28 - 7
daemon/daemon.go

@@ -83,6 +83,7 @@ type Daemon struct {
 	ID                    string
 	repository            string
 	containers            container.Store
+	containersReplica     container.ViewDB
 	execCommands          *exec.Store
 	downloadManager       *xfer.LayerDownloadManager
 	uploadManager         *xfer.LayerUploadManager
@@ -182,17 +183,20 @@ func (daemon *Daemon) restore() error {
 	activeSandboxes := make(map[string]interface{})
 	for id, c := range containers {
 		if err := daemon.registerName(c); err != nil {
-			logrus.Errorf("Failed to register container %s: %s", c.ID, err)
+			logrus.Errorf("Failed to register container name %s: %s", c.ID, err)
 			delete(containers, id)
 			continue
 		}
-		daemon.Register(c)
-
 		// verify that all volumes valid and have been migrated from the pre-1.7 layout
 		if err := daemon.verifyVolumesInfo(c); err != nil {
 			// don't skip the container due to error
 			logrus.Errorf("Failed to verify volumes for container '%s': %v", c.ID, err)
 		}
+		if err := daemon.Register(c); err != nil {
+			logrus.Errorf("Failed to register container %s: %s", c.ID, err)
+			delete(containers, id)
+			continue
+		}
 
 		// The LogConfig.Type is empty if the container was created before docker 1.12 with default log driver.
 		// We should rewrite it to use the daemon defaults.
@@ -212,7 +216,7 @@ func (daemon *Daemon) restore() error {
 		go func(c *container.Container) {
 			defer wg.Done()
 			daemon.backportMountSpec(c)
-			if err := c.ToDiskLocking(); err != nil {
+			if err := daemon.checkpointAndSave(c); err != nil {
 				logrus.WithError(err).WithField("container", c.ID).Error("error saving backported mountspec to disk")
 			}
 
@@ -271,6 +275,7 @@ func (daemon *Daemon) restore() error {
 				}
 			}
 
+			c.Lock()
 			if c.RemovalInProgress {
 				// We probably crashed in the middle of a removal, reset
 				// the flag.
@@ -281,10 +286,13 @@ func (daemon *Daemon) restore() error {
 				// be removed. So we put the container in the "dead"
 				// state and leave further processing up to them.
 				logrus.Debugf("Resetting RemovalInProgress flag from %v", c.ID)
-				c.ResetRemovalInProgress()
-				c.SetDead()
-				c.ToDisk()
+				c.RemovalInProgress = false
+				c.Dead = true
+				if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+					logrus.Errorf("Failed to update container %s state: %v", c.ID, err)
+				}
 			}
+			c.Unlock()
 		}(c)
 	}
 	wg.Wait()
@@ -755,6 +763,9 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
 	d.ID = trustKey.PublicKey().KeyID()
 	d.repository = daemonRepo
 	d.containers = container.NewMemoryStore()
+	if d.containersReplica, err = container.NewViewDB(); err != nil {
+		return nil, err
+	}
 	d.execCommands = exec.NewStore()
 	d.trustKey = trustKey
 	d.idIndex = truncindex.NewTruncIndex([]string{})
@@ -1222,3 +1233,13 @@ func CreateDaemonRoot(config *config.Config) error {
 	}
 	return setupDaemonRoot(config, realRoot, idMappings.RootPair())
 }
+
+// checkpointAndSave grabs a container lock to safely call container.CheckpointTo
+func (daemon *Daemon) checkpointAndSave(container *container.Container) error {
+	container.Lock()
+	defer container.Unlock()
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+		return fmt.Errorf("Error saving container state: %v", err)
+	}
+	return nil
+}

+ 2 - 1
daemon/daemon_unix.go

@@ -1146,7 +1146,8 @@ func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *
 
 	// After we load all the links into the daemon
 	// set them to nil on the hostconfig
-	return container.WriteHostConfig()
+	_, err := container.WriteHostConfig()
+	return err
 }
 
 // conditionalMountOnStart is a platform specific helper function during the

+ 5 - 1
daemon/daemon_unix_test.go

@@ -284,7 +284,11 @@ func TestMigratePre17Volumes(t *testing.T) {
 	}
 	volumedrivers.Register(drv, volume.DefaultDriverName)
 
-	daemon := &Daemon{root: rootDir, repository: containerRoot, volumes: volStore}
+	daemon := &Daemon{
+		root:       rootDir,
+		repository: containerRoot,
+		volumes:    volStore,
+	}
 	err = ioutil.WriteFile(filepath.Join(containerRoot, cid, "config.v2.json"), config, 600)
 	if err != nil {
 		t.Fatal(err)

+ 5 - 2
daemon/delete.go

@@ -103,14 +103,16 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
 	}
 
 	// Mark container dead. We don't want anybody to be restarting it.
-	container.SetDead()
+	container.Lock()
+	container.Dead = true
 
 	// Save container state to disk. So that if error happens before
 	// container meta file got removed from disk, then a restart of
 	// docker should not make a dead container alive.
-	if err := container.ToDiskLocking(); err != nil && !os.IsNotExist(err) {
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil && !os.IsNotExist(err) {
 		logrus.Errorf("Error saving dying container to disk: %v", err)
 	}
+	container.Unlock()
 
 	// When container creation fails and `RWLayer` has not been created yet, we
 	// do not call `ReleaseRWLayer`
@@ -131,6 +133,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
 	selinuxFreeLxcContexts(container.ProcessLabel)
 	daemon.idIndex.Delete(container.ID)
 	daemon.containers.Delete(container.ID)
+	daemon.containersReplica.Delete(container)
 	if e := daemon.removeMountPoints(container, removeVolume); e != nil {
 		logrus.Error(e)
 	}

+ 7 - 0
daemon/health.go

@@ -167,6 +167,13 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
 		// Else we're starting or healthy. Stay in that state.
 	}
 
+	// replicate Health status changes
+	if err := c.CheckpointTo(d.containersReplica); err != nil {
+		// queries will be inconsistent until the next probe runs or other state mutations
+		// checkpoint the container
+		logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err)
+	}
+
 	if oldStatus != h.Status {
 		d.LogContainerEvent(c, "health_status: "+h.Status)
 	}

+ 15 - 2
daemon/health_test.go

@@ -29,7 +29,13 @@ func TestNoneHealthcheck(t *testing.T) {
 		},
 		State: &container.State{},
 	}
-	daemon := &Daemon{}
+	store, err := container.NewViewDB()
+	if err != nil {
+		t.Fatal(err)
+	}
+	daemon := &Daemon{
+		containersReplica: store,
+	}
 
 	daemon.initHealthMonitor(c)
 	if c.State.Health != nil {
@@ -62,8 +68,15 @@ func TestHealthStates(t *testing.T) {
 			Image: "image_name",
 		},
 	}
+
+	store, err := container.NewViewDB()
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	daemon := &Daemon{
-		EventsService: e,
+		EventsService:     e,
+		containersReplica: store,
 	}
 
 	c.Config.Healthcheck = &containertypes.HealthConfig{

+ 2 - 2
daemon/inspect.go

@@ -51,7 +51,7 @@ func (daemon *Daemon) ContainerInspectCurrent(name string, size bool) (*types.Co
 		}
 	}
 
-	mountPoints := addMountPoints(container)
+	mountPoints := container.GetMountPoints()
 	networkSettings := &types.NetworkSettings{
 		NetworkSettingsBase: types.NetworkSettingsBase{
 			Bridge:                 container.NetworkSettings.Bridge,
@@ -104,7 +104,7 @@ func (daemon *Daemon) containerInspect120(name string) (*v1p20.ContainerJSON, er
 		return nil, err
 	}
 
-	mountPoints := addMountPoints(container)
+	mountPoints := container.GetMountPoints()
 	config := &v1p20.ContainerConfig{
 		Config:          container.Config,
 		MacAddress:      container.Config.MacAddress,

+ 0 - 14
daemon/inspect_solaris.go

@@ -18,20 +18,6 @@ func (daemon *Daemon) containerInspectPre120(name string) (*v1p19.ContainerJSON,
 	return &v1p19.ContainerJSON{}, nil
 }
 
-func addMountPoints(container *container.Container) []types.MountPoint {
-	mountPoints := make([]types.MountPoint, 0, len(container.MountPoints))
-	for _, m := range container.MountPoints {
-		mountPoints = append(mountPoints, types.MountPoint{
-			Name:        m.Name,
-			Source:      m.Path(),
-			Destination: m.Destination,
-			Driver:      m.Driver,
-			RW:          m.RW,
-		})
-	}
-	return mountPoints
-}
-
 func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
 	return &backend.ExecProcessConfig{
 		Tty:        e.Tty,

+ 0 - 17
daemon/inspect_unix.go

@@ -64,23 +64,6 @@ func (daemon *Daemon) containerInspectPre120(name string) (*v1p19.ContainerJSON,
 	}, nil
 }
 
-func addMountPoints(container *container.Container) []types.MountPoint {
-	mountPoints := make([]types.MountPoint, 0, len(container.MountPoints))
-	for _, m := range container.MountPoints {
-		mountPoints = append(mountPoints, types.MountPoint{
-			Type:        m.Type,
-			Name:        m.Name,
-			Source:      m.Path(),
-			Destination: m.Destination,
-			Driver:      m.Driver,
-			Mode:        m.Mode,
-			RW:          m.RW,
-			Propagation: m.Propagation,
-		})
-	}
-	return mountPoints
-}
-
 func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
 	return &backend.ExecProcessConfig{
 		Tty:        e.Tty,

+ 0 - 15
daemon/inspect_windows.go

@@ -12,21 +12,6 @@ func setPlatformSpecificContainerFields(container *container.Container, contJSON
 	return contJSONBase
 }
 
-func addMountPoints(container *container.Container) []types.MountPoint {
-	mountPoints := make([]types.MountPoint, 0, len(container.MountPoints))
-	for _, m := range container.MountPoints {
-		mountPoints = append(mountPoints, types.MountPoint{
-			Type:        m.Type,
-			Name:        m.Name,
-			Source:      m.Path(),
-			Destination: m.Destination,
-			Driver:      m.Driver,
-			RW:          m.RW,
-		})
-	}
-	return mountPoints
-}
-
 // containerInspectPre120 get containers for pre 1.20 APIs.
 func (daemon *Daemon) containerInspectPre120(name string) (*types.ContainerJSON, error) {
 	return daemon.ContainerInspectCurrent(name, false)

+ 73 - 142
daemon/list.go

@@ -10,7 +10,6 @@ import (
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/filters"
-	networktypes "github.com/docker/docker/api/types/network"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/volume"
@@ -47,7 +46,7 @@ type iterationAction int
 
 // containerReducer represents a reducer for a container.
 // Returns the object to serialize by the api.
-type containerReducer func(*container.Container, *listContext) (*types.Container, error)
+type containerReducer func(*container.Snapshot, *listContext) (*types.Container, error)
 
 const (
 	// includeContainer is the action to include a container in the reducer.
@@ -83,9 +82,9 @@ type listContext struct {
 	exitAllowed []int
 
 	// beforeFilter is a filter to ignore containers that appear before the one given
-	beforeFilter *container.Container
+	beforeFilter *container.Snapshot
 	// sinceFilter is a filter to stop the filtering when the iterator arrive to the given container
-	sinceFilter *container.Container
+	sinceFilter *container.Snapshot
 
 	// taskFilter tells if we should filter based on wether a container is part of a task
 	taskFilter bool
@@ -101,21 +100,21 @@ type listContext struct {
 	*types.ContainerListOptions
 }
 
-// byContainerCreated is a temporary type used to sort a list of containers by creation time.
-type byContainerCreated []*container.Container
+// byCreatedDescending is a temporary type used to sort a list of containers by creation time.
+type byCreatedDescending []container.Snapshot
 
-func (r byContainerCreated) Len() int      { return len(r) }
-func (r byContainerCreated) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
-func (r byContainerCreated) Less(i, j int) bool {
-	return r[i].Created.UnixNano() < r[j].Created.UnixNano()
+func (r byCreatedDescending) Len() int      { return len(r) }
+func (r byCreatedDescending) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
+func (r byCreatedDescending) Less(i, j int) bool {
+	return r[j].CreatedAt.UnixNano() < r[i].CreatedAt.UnixNano()
 }
 
 // Containers returns the list of containers to show given the user's filtering.
 func (daemon *Daemon) Containers(config *types.ContainerListOptions) ([]*types.Container, error) {
-	return daemon.reduceContainers(config, daemon.transformContainer)
+	return daemon.reduceContainers(config, daemon.refreshImage)
 }
 
-func (daemon *Daemon) filterByNameIDMatches(ctx *listContext) []*container.Container {
+func (daemon *Daemon) filterByNameIDMatches(view container.View, ctx *listContext) ([]container.Snapshot, error) {
 	idSearch := false
 	names := ctx.filters.Get("name")
 	ids := ctx.filters.Get("id")
@@ -123,7 +122,9 @@ func (daemon *Daemon) filterByNameIDMatches(ctx *listContext) []*container.Conta
 		// if name or ID filters are not in use, return to
 		// standard behavior of walking the entire container
 		// list from the daemon's in-memory store
-		return daemon.List()
+		all, err := view.All()
+		sort.Sort(byCreatedDescending(all))
+		return all, err
 	}
 
 	// idSearch will determine if we limit name matching to the IDs
@@ -158,38 +159,46 @@ func (daemon *Daemon) filterByNameIDMatches(ctx *listContext) []*container.Conta
 		}
 	}
 
-	cntrs := make([]*container.Container, 0, len(matches))
+	cntrs := make([]container.Snapshot, 0, len(matches))
 	for id := range matches {
-		if c := daemon.containers.Get(id); c != nil {
-			cntrs = append(cntrs, c)
+		c, err := view.Get(id)
+		if err != nil {
+			return nil, err
+		}
+		if c != nil {
+			cntrs = append(cntrs, *c)
 		}
 	}
 
 	// Restore sort-order after filtering
 	// Created gives us nanosec resolution for sorting
-	sort.Sort(sort.Reverse(byContainerCreated(cntrs)))
+	sort.Sort(byCreatedDescending(cntrs))
 
-	return cntrs
+	return cntrs, nil
 }
 
 // reduceContainers parses the user's filtering options and generates the list of containers to return based on a reducer.
 func (daemon *Daemon) reduceContainers(config *types.ContainerListOptions, reducer containerReducer) ([]*types.Container, error) {
 	var (
+		view       = daemon.containersReplica.Snapshot(daemon.nameIndex)
 		containers = []*types.Container{}
 	)
 
-	ctx, err := daemon.foldFilter(config)
+	ctx, err := daemon.foldFilter(view, config)
 	if err != nil {
 		return nil, err
 	}
 
 	// fastpath to only look at a subset of containers if specific name
 	// or ID matches were provided by the user--otherwise we potentially
-	// end up locking and querying many more containers than intended
-	containerList := daemon.filterByNameIDMatches(ctx)
+	// end up querying many more containers than intended
+	containerList, err := daemon.filterByNameIDMatches(view, ctx)
+	if err != nil {
+		return nil, err
+	}
 
-	for _, container := range containerList {
-		t, err := daemon.reducePsContainer(container, ctx, reducer)
+	for i := range containerList {
+		t, err := daemon.reducePsContainer(&containerList[i], ctx, reducer)
 		if err != nil {
 			if err != errStopIteration {
 				return nil, err
@@ -206,23 +215,17 @@ func (daemon *Daemon) reduceContainers(config *types.ContainerListOptions, reduc
 }
 
 // reducePsContainer is the basic representation for a container as expected by the ps command.
-func (daemon *Daemon) reducePsContainer(container *container.Container, ctx *listContext, reducer containerReducer) (*types.Container, error) {
-	container.Lock()
-
+func (daemon *Daemon) reducePsContainer(container *container.Snapshot, ctx *listContext, reducer containerReducer) (*types.Container, error) {
 	// filter containers to return
-	action := includeContainerInList(container, ctx)
-	switch action {
+	switch includeContainerInList(container, ctx) {
 	case excludeContainer:
-		container.Unlock()
 		return nil, nil
 	case stopIteration:
-		container.Unlock()
 		return nil, errStopIteration
 	}
 
 	// transform internal container struct into api structs
 	newC, err := reducer(container, ctx)
-	container.Unlock()
 	if err != nil {
 		return nil, err
 	}
@@ -237,7 +240,7 @@ func (daemon *Daemon) reducePsContainer(container *container.Container, ctx *lis
 }
 
 // foldFilter generates the container filter based on the user's filtering options.
-func (daemon *Daemon) foldFilter(config *types.ContainerListOptions) (*listContext, error) {
+func (daemon *Daemon) foldFilter(view container.View, config *types.ContainerListOptions) (*listContext, error) {
 	psFilters := config.Filters
 
 	if err := psFilters.Validate(acceptedPsFilterTags); err != nil {
@@ -294,10 +297,10 @@ func (daemon *Daemon) foldFilter(config *types.ContainerListOptions) (*listConte
 		return nil, err
 	}
 
-	var beforeContFilter, sinceContFilter *container.Container
+	var beforeContFilter, sinceContFilter *container.Snapshot
 
 	err = psFilters.WalkValues("before", func(value string) error {
-		beforeContFilter, err = daemon.GetContainer(value)
+		beforeContFilter, err = view.Get(value)
 		return err
 	})
 	if err != nil {
@@ -305,7 +308,7 @@ func (daemon *Daemon) foldFilter(config *types.ContainerListOptions) (*listConte
 	}
 
 	err = psFilters.WalkValues("since", func(value string) error {
-		sinceContFilter, err = daemon.GetContainer(value)
+		sinceContFilter, err = view.Get(value)
 		return err
 	})
 	if err != nil {
@@ -383,7 +386,7 @@ func portOp(key string, filter map[nat.Port]bool) func(value string) error {
 
 // includeContainerInList decides whether a container should be included in the output or not based in the filter.
 // It also decides if the iteration should be stopped or not.
-func includeContainerInList(container *container.Container, ctx *listContext) iterationAction {
+func includeContainerInList(container *container.Snapshot, ctx *listContext) iterationAction {
 	// Do not include container if it's in the list before the filter container.
 	// Set the filter container to nil to include the rest of containers after this one.
 	if ctx.beforeFilter != nil {
@@ -422,7 +425,7 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
 	}
 
 	// Do not include container if any of the labels don't match
-	if !ctx.filters.MatchKVList("label", container.Config.Labels) {
+	if !ctx.filters.MatchKVList("label", container.Labels) {
 		return excludeContainer
 	}
 
@@ -440,7 +443,7 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
 	if len(ctx.exitAllowed) > 0 {
 		shouldSkip := true
 		for _, code := range ctx.exitAllowed {
-			if code == container.ExitCode() && !container.Running && !container.StartedAt.IsZero() {
+			if code == container.ExitCode && !container.Running && !container.StartedAt.IsZero() {
 				shouldSkip = false
 				break
 			}
@@ -451,28 +454,34 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
 	}
 
 	// Do not include container if its status doesn't match the filter
-	if !ctx.filters.Match("status", container.State.StateString()) {
+	if !ctx.filters.Match("status", container.State) {
 		return excludeContainer
 	}
 
 	// Do not include container if its health doesn't match the filter
-	if !ctx.filters.ExactMatch("health", container.State.HealthString()) {
+	if !ctx.filters.ExactMatch("health", container.Health) {
 		return excludeContainer
 	}
 
 	if ctx.filters.Include("volume") {
-		volumesByName := make(map[string]*volume.MountPoint)
-		for _, m := range container.MountPoints {
+		volumesByName := make(map[string]types.MountPoint)
+		for _, m := range container.Mounts {
 			if m.Name != "" {
 				volumesByName[m.Name] = m
 			} else {
 				volumesByName[m.Source] = m
 			}
 		}
+		volumesByDestination := make(map[string]types.MountPoint)
+		for _, m := range container.Mounts {
+			if m.Destination != "" {
+				volumesByDestination[m.Destination] = m
+			}
+		}
 
 		volumeExist := fmt.Errorf("volume mounted in container")
 		err := ctx.filters.WalkValues("volume", func(value string) error {
-			if _, exist := container.MountPoints[value]; exist {
+			if _, exist := volumesByDestination[value]; exist {
 				return volumeExist
 			}
 			if _, exist := volumesByName[value]; exist {
@@ -489,19 +498,25 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
 		if len(ctx.images) == 0 {
 			return excludeContainer
 		}
-		if !ctx.images[container.ImageID] {
+		if !ctx.images[image.ID(container.ImageID)] {
 			return excludeContainer
 		}
 	}
 
-	networkExist := fmt.Errorf("container part of network")
+	var (
+		networkExist = errors.New("container part of network")
+		noNetworks   = errors.New("container is not part of any networks")
+	)
 	if ctx.filters.Include("network") {
 		err := ctx.filters.WalkValues("network", func(value string) error {
+			if container.NetworkSettings == nil {
+				return noNetworks
+			}
 			if _, ok := container.NetworkSettings.Networks[value]; ok {
 				return networkExist
 			}
 			for _, nw := range container.NetworkSettings.Networks {
-				if nw.EndpointSettings == nil {
+				if nw == nil {
 					continue
 				}
 				if strings.HasPrefix(nw.NetworkID, value) {
@@ -518,7 +533,7 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
 	if len(ctx.publish) > 0 {
 		shouldSkip := true
 		for port := range ctx.publish {
-			if _, ok := container.HostConfig.PortBindings[port]; ok {
+			if _, ok := container.PortBindings[port]; ok {
 				shouldSkip = false
 				break
 			}
@@ -531,7 +546,7 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
 	if len(ctx.expose) > 0 {
 		shouldSkip := true
 		for port := range ctx.expose {
-			if _, ok := container.Config.ExposedPorts[port]; ok {
+			if _, ok := container.ExposedPorts[port]; ok {
 				shouldSkip = false
 				break
 			}
@@ -544,106 +559,22 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
 	return includeContainer
 }
 
-// transformContainer generates the container type expected by the docker ps command.
-func (daemon *Daemon) transformContainer(container *container.Container, ctx *listContext) (*types.Container, error) {
-	newC := &types.Container{
-		ID:      container.ID,
-		Names:   ctx.names[container.ID],
-		ImageID: container.ImageID.String(),
-	}
-	if newC.Names == nil {
-		// Dead containers will often have no name, so make sure the response isn't null
-		newC.Names = []string{}
-	}
-
-	image := container.Config.Image // if possible keep the original ref
-	if image != container.ImageID.String() {
+// refreshImage checks if the Image ref still points to the correct ID, and updates the ref to the actual ID when it doesn't
+func (daemon *Daemon) refreshImage(s *container.Snapshot, ctx *listContext) (*types.Container, error) {
+	c := s.Container
+	image := s.Image // keep the original ref if still valid (hasn't changed)
+	if image != s.ImageID {
 		id, _, err := daemon.GetImageIDAndPlatform(image)
 		if _, isDNE := err.(ErrImageDoesNotExist); err != nil && !isDNE {
 			return nil, err
 		}
-		if err != nil || id != container.ImageID {
-			image = container.ImageID.String()
+		if err != nil || id.String() != s.ImageID {
+			// ref changed, we need to use original ID
+			image = s.ImageID
 		}
 	}
-	newC.Image = image
-
-	if len(container.Args) > 0 {
-		args := []string{}
-		for _, arg := range container.Args {
-			if strings.Contains(arg, " ") {
-				args = append(args, fmt.Sprintf("'%s'", arg))
-			} else {
-				args = append(args, arg)
-			}
-		}
-		argsAsString := strings.Join(args, " ")
-
-		newC.Command = fmt.Sprintf("%s %s", container.Path, argsAsString)
-	} else {
-		newC.Command = container.Path
-	}
-	newC.Created = container.Created.Unix()
-	newC.State = container.State.StateString()
-	newC.Status = container.State.String()
-	newC.HostConfig.NetworkMode = string(container.HostConfig.NetworkMode)
-	// copy networks to avoid races
-	networks := make(map[string]*networktypes.EndpointSettings)
-	for name, network := range container.NetworkSettings.Networks {
-		if network == nil || network.EndpointSettings == nil {
-			continue
-		}
-		networks[name] = &networktypes.EndpointSettings{
-			EndpointID:          network.EndpointID,
-			Gateway:             network.Gateway,
-			IPAddress:           network.IPAddress,
-			IPPrefixLen:         network.IPPrefixLen,
-			IPv6Gateway:         network.IPv6Gateway,
-			GlobalIPv6Address:   network.GlobalIPv6Address,
-			GlobalIPv6PrefixLen: network.GlobalIPv6PrefixLen,
-			MacAddress:          network.MacAddress,
-			NetworkID:           network.NetworkID,
-		}
-		if network.IPAMConfig != nil {
-			networks[name].IPAMConfig = &networktypes.EndpointIPAMConfig{
-				IPv4Address: network.IPAMConfig.IPv4Address,
-				IPv6Address: network.IPAMConfig.IPv6Address,
-			}
-		}
-	}
-	newC.NetworkSettings = &types.SummaryNetworkSettings{Networks: networks}
-
-	newC.Ports = []types.Port{}
-	for port, bindings := range container.NetworkSettings.Ports {
-		p, err := nat.ParsePort(port.Port())
-		if err != nil {
-			return nil, err
-		}
-		if len(bindings) == 0 {
-			newC.Ports = append(newC.Ports, types.Port{
-				PrivatePort: uint16(p),
-				Type:        port.Proto(),
-			})
-			continue
-		}
-		for _, binding := range bindings {
-			h, err := nat.ParsePort(binding.HostPort)
-			if err != nil {
-				return nil, err
-			}
-			newC.Ports = append(newC.Ports, types.Port{
-				PrivatePort: uint16(p),
-				PublicPort:  uint16(h),
-				Type:        port.Proto(),
-				IP:          binding.HostIP,
-			})
-		}
-	}
-
-	newC.Labels = container.Config.Labels
-	newC.Mounts = addMountPoints(container)
-
-	return newC, nil
+	c.Image = image
+	return &c, nil
 }
 
 // Volumes lists known volumes, using the filter to restrict the range

+ 1 - 1
daemon/list_unix.go

@@ -6,6 +6,6 @@ import "github.com/docker/docker/container"
 
 // excludeByIsolation is a platform specific helper function to support PS
 // filtering by Isolation. This is a Windows-only concept, so is a no-op on Unix.
-func excludeByIsolation(container *container.Container, ctx *listContext) iterationAction {
+func excludeByIsolation(container *container.Snapshot, ctx *listContext) iterationAction {
 	return includeContainer
 }

+ 1 - 1
daemon/list_windows.go

@@ -8,7 +8,7 @@ import (
 
 // excludeByIsolation is a platform specific helper function to support PS
 // filtering by Isolation. This is a Windows-only concept, so is a no-op on Unix.
-func excludeByIsolation(container *container.Container, ctx *listContext) iterationAction {
+func excludeByIsolation(container *container.Snapshot, ctx *listContext) iterationAction {
 	i := strings.ToLower(string(container.HostConfig.Isolation))
 	if i == "" {
 		i = "default"

+ 10 - 7
daemon/monitor.go

@@ -39,6 +39,9 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 			return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.")
 		}
 		daemon.updateHealthMonitor(c)
+		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+			return err
+		}
 		daemon.LogContainerEvent(c, "oom")
 	case libcontainerd.StateExit:
 
@@ -90,7 +93,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 		daemon.setStateCounter(c)
 
 		defer c.Unlock()
-		if err := c.ToDisk(); err != nil {
+		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
 			return err
 		}
 		return daemon.postRunProcessing(c, e)
@@ -119,30 +122,30 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 		c.HasBeenStartedBefore = true
 		daemon.setStateCounter(c)
 
-		if err := c.ToDisk(); err != nil {
+		daemon.initHealthMonitor(c)
+		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
 			c.Reset(false)
 			return err
 		}
-		daemon.initHealthMonitor(c)
 
 		daemon.LogContainerEvent(c, "start")
 	case libcontainerd.StatePause:
 		// Container is already locked in this case
 		c.Paused = true
 		daemon.setStateCounter(c)
-		if err := c.ToDisk(); err != nil {
+		daemon.updateHealthMonitor(c)
+		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
 			return err
 		}
-		daemon.updateHealthMonitor(c)
 		daemon.LogContainerEvent(c, "pause")
 	case libcontainerd.StateResume:
 		// Container is already locked in this case
 		c.Paused = false
 		daemon.setStateCounter(c)
-		if err := c.ToDisk(); err != nil {
+		daemon.updateHealthMonitor(c)
+		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
 			return err
 		}
-		daemon.updateHealthMonitor(c)
 		daemon.LogContainerEvent(c, "unpause")
 	}
 	return nil

+ 0 - 4
daemon/names.go

@@ -30,10 +30,6 @@ func (daemon *Daemon) registerName(container *container.Container) error {
 			return err
 		}
 		container.Name = name
-
-		if err := container.ToDiskLocking(); err != nil {
-			logrus.Errorf("Error saving container name to disk: %v", err)
-		}
 	}
 	return daemon.nameIndex.Reserve(container.Name, container.ID)
 }

+ 2 - 2
daemon/rename.go

@@ -82,7 +82,7 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) error {
 		daemon.nameIndex.Release(oldName + k)
 	}
 	daemon.releaseName(oldName)
-	if err = container.ToDisk(); err != nil {
+	if err = container.CheckpointTo(daemon.containersReplica); err != nil {
 		return err
 	}
 
@@ -99,7 +99,7 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) error {
 		if err != nil {
 			container.Name = oldName
 			container.NetworkSettings.IsAnonymousEndpoint = oldIsAnonymousEndpoint
-			if e := container.ToDisk(); e != nil {
+			if e := container.CheckpointTo(daemon.containersReplica); e != nil {
 				logrus.Errorf("%s: Failed in writing to Disk on rename failure: %v", container.ID, e)
 			}
 		}

+ 1 - 1
daemon/restart.go

@@ -52,7 +52,7 @@ func (daemon *Daemon) containerRestart(container *container.Container, seconds i
 		container.HostConfig.AutoRemove = autoRemove
 		// containerStop will write HostConfig to disk, we shall restore AutoRemove
 		// in disk too
-		if toDiskErr := container.ToDiskLocking(); toDiskErr != nil {
+		if toDiskErr := daemon.checkpointAndSave(container); toDiskErr != nil {
 			logrus.Errorf("Write container to disk error: %v", toDiskErr)
 		}
 

+ 4 - 8
daemon/start.go

@@ -58,7 +58,7 @@ func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.Hos
 				// if user has change the network mode on starting, clean up the
 				// old networks. It is a deprecated feature and has been removed in Docker 1.12
 				container.NetworkSettings.Networks = nil
-				if err := container.ToDisk(); err != nil {
+				if err := container.CheckpointTo(daemon.containersReplica); err != nil {
 					return err
 				}
 			}
@@ -86,11 +86,6 @@ func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.Hos
 	return daemon.containerStart(container, checkpoint, checkpointDir, true)
 }
 
-// Start starts a container
-func (daemon *Daemon) Start(container *container.Container) error {
-	return daemon.containerStart(container, "", "", true)
-}
-
 // containerStart prepares the container to run by setting up everything the
 // container needs, such as storage and networking, as well as links
 // between containers. The container is left waiting for a signal to
@@ -117,8 +112,9 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
 			if container.ExitCode() == 0 {
 				container.SetExitCode(128)
 			}
-			container.ToDisk()
-
+			if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+				logrus.Errorf("%s: failed saving state on start failure: %v", container.ID, err)
+			}
 			container.Reset(false)
 
 			daemon.Cleanup(container)

+ 2 - 1
daemon/start_unix.go

@@ -9,13 +9,14 @@ import (
 	"github.com/docker/docker/libcontainerd"
 )
 
+// getLibcontainerdCreateOptions callers must hold a lock on the container
 func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
 	createOptions := []libcontainerd.CreateOption{}
 
 	// Ensure a runtime has been assigned to this container
 	if container.HostConfig.Runtime == "" {
 		container.HostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
-		container.ToDisk()
+		container.CheckpointTo(daemon.containersReplica)
 	}
 
 	rt := daemon.configStore.GetRuntime(container.HostConfig.Runtime)

+ 9 - 1
daemon/update.go

@@ -38,7 +38,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
 		if restoreConfig {
 			container.Lock()
 			container.HostConfig = &backupHostConfig
-			container.ToDisk()
+			container.CheckpointTo(daemon.containersReplica)
 			container.Unlock()
 		}
 	}()
@@ -47,10 +47,18 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
 		return errCannotUpdate(container.ID, fmt.Errorf("Container is marked for removal and cannot be \"update\"."))
 	}
 
+	container.Lock()
 	if err := container.UpdateContainer(hostConfig); err != nil {
 		restoreConfig = true
+		container.Unlock()
 		return errCannotUpdate(container.ID, err)
 	}
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+		restoreConfig = true
+		container.Unlock()
+		return errCannotUpdate(container.ID, err)
+	}
+	container.Unlock()
 
 	// if Restart Policy changed, we need to update container monitor
 	if hostConfig.RestartPolicy.Name != "" {

+ 3 - 1
daemon/volumes_unix.go

@@ -137,6 +137,9 @@ func migrateVolume(id, vfs string) error {
 // verifyVolumesInfo ports volumes configured for the containers pre docker 1.7.
 // It reads the container configuration and creates valid mount points for the old volumes.
 func (daemon *Daemon) verifyVolumesInfo(container *container.Container) error {
+	container.Lock()
+	defer container.Unlock()
+
 	// Inspect old structures only when we're upgrading from old versions
 	// to versions >= 1.7 and the MountPoints has not been populated with volumes data.
 	type volumes struct {
@@ -177,7 +180,6 @@ func (daemon *Daemon) verifyVolumesInfo(container *container.Container) error {
 				container.MountPoints[destination] = &m
 			}
 		}
-		return container.ToDisk()
 	}
 	return nil
 }

+ 1 - 1
integration-cli/docker_cli_daemon_test.go

@@ -2684,7 +2684,7 @@ func (s *DockerDaemonSuite) TestDaemonBackcompatPre17Volumes(c *check.C) {
 	`)
 
 	configPath := filepath.Join(d.Root, "containers", id, "config.v2.json")
-	err = ioutil.WriteFile(configPath, config, 600)
+	c.Assert(ioutil.WriteFile(configPath, config, 600), checker.IsNil)
 	d.Start(c)
 
 	out, err = d.Cmd("inspect", "--type=container", "--format={{ json .Mounts }}", id)

+ 6 - 2
integration-cli/environment/clean.go

@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/http"
+	"regexp"
 	"strings"
 
 	"github.com/docker/docker/api/types"
@@ -50,14 +51,17 @@ func getPausedContainers(t testingT, dockerBinary string) []string {
 	return strings.Fields(result.Combined())
 }
 
+var alreadyExists = regexp.MustCompile(`Error response from daemon: removal of container (\w+) is already in progress`)
+
 func deleteAllContainers(t testingT, dockerBinary string) {
 	containers := getAllContainers(t, dockerBinary)
 	if len(containers) > 0 {
 		result := icmd.RunCommand(dockerBinary, append([]string{"rm", "-fv"}, containers...)...)
 		if result.Error != nil {
 			// If the error is "No such container: ..." this means the container doesn't exists anymore,
-			// we can safely ignore that one.
-			if strings.Contains(result.Stderr(), "No such container") {
+			// or if it is "... removal of container ... is already in progress" it will be removed eventually.
+			// We can safely ignore those.
+			if strings.Contains(result.Stderr(), "No such container") || alreadyExists.MatchString(result.Stderr()) {
 				return
 			}
 			t.Fatalf("error removing containers %v : %v (%s)", containers, result.Error, result.Combined())