ソースを参照

daemon: stop checkpointing health probes to disk

The health status and probe log of containers are not mission-criticial
data which must survive a crash. It is not worth prematrely wearing out
consumer-grade flash storage by overwriting and fsync()ing the container
config on after every probe. Update only the live Container object and
the ViewDB replica on every container health probe instead. It will
eventually get checkpointed along with some other state (or config)
change. Running containers will not be checkpointed on daemon shutdown
when live-restore is enabled, but it does not matter: the health status
and probe log will be zeroed out when the daemon starts back up.

Signed-off-by: Cory Snider <csnider@mirantis.com>
Cory Snider 1 年間 前
コミット
97d32bb7d7
2 ファイル変更31 行追加2 行削除
  1. 26 0
      container/container.go
  2. 5 2
      daemon/health.go

+ 26 - 0
container/container.go

@@ -267,6 +267,32 @@ func (container *Container) WriteHostConfig() (*containertypes.HostConfig, error
 	return &deepCopy, nil
 	return &deepCopy, nil
 }
 }
 
 
+// CommitInMemory makes the Container's current state visible to queries,
+// but does not persist state.
+//
+// Callers must hold a Container lock.
+func (container *Container) CommitInMemory(store *ViewDB) error {
+	var buf bytes.Buffer
+	if err := json.NewEncoder(&buf).Encode(container); err != nil {
+		return err
+	}
+
+	var deepCopy Container
+	if err := json.NewDecoder(&buf).Decode(&deepCopy); err != nil {
+		return err
+	}
+
+	buf.Reset()
+	if err := json.NewEncoder(&buf).Encode(container.HostConfig); err != nil {
+		return err
+	}
+	if err := json.NewDecoder(&buf).Decode(&deepCopy.HostConfig); err != nil {
+		return err
+	}
+
+	return store.Save(&deepCopy)
+}
+
 // SetupWorkingDirectory sets up the container's working directory as set in container.Config.WorkingDir
 // SetupWorkingDirectory sets up the container's working directory as set in container.Config.WorkingDir
 func (container *Container) SetupWorkingDirectory(rootIdentity idtools.Identity) error {
 func (container *Container) SetupWorkingDirectory(rootIdentity idtools.Identity) error {
 	if container.Config.WorkingDir == "" {
 	if container.Config.WorkingDir == "" {

+ 5 - 2
daemon/health.go

@@ -231,8 +231,11 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
 		// Else we're starting or healthy. Stay in that state.
 		// Else we're starting or healthy. Stay in that state.
 	}
 	}
 
 
-	// replicate Health status changes
-	if err := c.CheckpointTo(d.containersReplica); err != nil {
+	// Replicate Health status changes to the API, skipping persistent storage
+	// to avoid unnecessary disk writes. The health state is only best-effort
+	// persisted across of the daemon. It will get written to disk on the next
+	// checkpoint, such as when the container state changes.
+	if err := c.CommitInMemory(d.containersReplica); err != nil {
 		// queries will be inconsistent until the next probe runs or other state mutations
 		// queries will be inconsistent until the next probe runs or other state mutations
 		// checkpoint the container
 		// checkpoint the container
 		log.G(context.TODO()).Errorf("Error replicating health state for container %s: %v", c.ID, err)
 		log.G(context.TODO()).Errorf("Error replicating health state for container %s: %v", c.ID, err)