|
@@ -1,9 +1,23 @@
|
|
|
// +build windows
|
|
|
|
|
|
-package lcow
|
|
|
+// Maintainer: jhowardmsft
|
|
|
+// Locale: en-gb
|
|
|
+// About: Graph-driver for Linux Containers On Windows (LCOW)
|
|
|
+//
|
|
|
+// This graphdriver runs in two modes. Yet to be determined which one will
|
|
|
+// be the shipping mode. The global mode is where a single utility VM
|
|
|
+// is used for all service VM tool operations. This isn't safe security-wise
|
|
|
+// as it's attaching a sandbox of multiple containers to it, containing
|
|
|
+// untrusted data. This may be fine for client devops scenarios. In
|
|
|
+// safe mode, a unique utility VM is instantiated for all service VM tool
|
|
|
+// operations. The downside of safe-mode is that operations are slower as
|
|
|
+// a new service utility VM has to be started and torn-down when needed.
|
|
|
+//
|
|
|
+// To enable global mode, run with --storage-opt lcow.globalmode=true
|
|
|
+
|
|
|
+// TODO: Grab logs from SVM at terminate or errors
|
|
|
|
|
|
-// Maintainer: @jhowardmsft
|
|
|
-// Graph-driver for Linux Containers On Windows (LCOW)
|
|
|
+package lcow
|
|
|
|
|
|
import (
|
|
|
"encoding/json"
|
|
@@ -12,6 +26,7 @@ import (
|
|
|
"io/ioutil"
|
|
|
"os"
|
|
|
"path/filepath"
|
|
|
+ "strconv"
|
|
|
"strings"
|
|
|
"sync"
|
|
|
"time"
|
|
@@ -21,158 +36,376 @@ import (
|
|
|
"github.com/docker/docker/daemon/graphdriver"
|
|
|
"github.com/docker/docker/pkg/archive"
|
|
|
"github.com/docker/docker/pkg/idtools"
|
|
|
+ "github.com/docker/docker/pkg/ioutils"
|
|
|
"github.com/docker/docker/pkg/system"
|
|
|
"github.com/jhowardmsft/opengcs/gogcs/client"
|
|
|
)
|
|
|
|
|
|
-// init registers the LCOW driver to the register.
|
|
|
+// init registers this driver to the register. It gets initialised by the
|
|
|
+// function passed in the second parameter, implemented in this file.
|
|
|
func init() {
|
|
|
- graphdriver.Register("lcow", InitLCOW)
|
|
|
+ graphdriver.Register("lcow", InitDriver)
|
|
|
}
|
|
|
|
|
|
const (
|
|
|
- // sandboxFilename is the name of the file containing a layers sandbox (read-write layer)
|
|
|
+ // sandboxFilename is the name of the file containing a layer's sandbox (read-write layer).
|
|
|
sandboxFilename = "sandbox.vhdx"
|
|
|
|
|
|
- // svmScratchFilename is the name of the scratch-space used by an SVM to avoid running out of memory
|
|
|
- svmScratchFilename = "scratch.vhdx"
|
|
|
+ // scratchFilename is the name of the scratch-space used by an SVM to avoid running out of memory.
|
|
|
+ scratchFilename = "scratch.vhdx"
|
|
|
+
|
|
|
+ // layerFilename is the name of the file containing a layer's read-only contents.
|
|
|
+ // Note this really is VHD format, not VHDX.
|
|
|
+ layerFilename = "layer.vhd"
|
|
|
+
|
|
|
+ // toolsScratchPath is a location in a service utility VM that the tools can use as a
|
|
|
+ // scratch space to avoid running out of memory.
|
|
|
+ // TODO @jhowardmsft. I really dislike this path! But needs a platform change or passing parameters to the tools.
|
|
|
+ toolsScratchPath = "/mnt/gcs/LinuxServiceVM/scratch"
|
|
|
+
|
|
|
+ // svmGlobalID is the ID used in the serviceVMs map for the global service VM when running in "global" mode.
|
|
|
+ svmGlobalID = "_lcow_global_svm_"
|
|
|
+
|
|
|
+ // cacheDirectory is the sub-folder under the driver's data-root used to cache blank sandbox and scratch VHDs.
|
|
|
+ cacheDirectory = "cache"
|
|
|
+
|
|
|
+ // scratchDirectory is the sub-folder under the driver's data-root used for scratch VHDs in service VMs
|
|
|
+ scratchDirectory = "scratch"
|
|
|
)
|
|
|
|
|
|
-// cacheType is our internal structure representing an item in our local cache
|
|
|
+// cacheItem is our internal structure representing an item in our local cache
|
|
|
// of things that have been mounted.
|
|
|
-type cacheType struct {
|
|
|
- uvmPath string // Path in utility VM
|
|
|
- hostPath string // Path on host
|
|
|
- refCount int // How many times its been mounted
|
|
|
- isSandbox bool // True if a sandbox
|
|
|
+type cacheItem struct {
|
|
|
+ sync.Mutex // Protects operations performed on this item
|
|
|
+ uvmPath string // Path in utility VM
|
|
|
+ hostPath string // Path on host
|
|
|
+ refCount int // How many times its been mounted
|
|
|
+ isSandbox bool // True if a sandbox
|
|
|
+ isMounted bool // True when mounted in a service VM
|
|
|
+}
|
|
|
+
|
|
|
+// serviceVMItem is our internal structure representing an item in our
|
|
|
+// map of service VMs we are maintaining.
|
|
|
+type serviceVMItem struct {
|
|
|
+ sync.Mutex // Serialises operations being performed in this service VM.
|
|
|
+ scratchAttached bool // Has a scratch been attached?
|
|
|
+ config *client.Config // Represents the service VM item.
|
|
|
}
|
|
|
|
|
|
// Driver represents an LCOW graph driver.
|
|
|
type Driver struct {
|
|
|
- // homeDir is the hostpath where we're storing everything
|
|
|
- homeDir string
|
|
|
- // cachedSandboxFile is the location of the local default-sized cached sandbox
|
|
|
- cachedSandboxFile string
|
|
|
- // options are the graphdriver options we are initialised with
|
|
|
- options []string
|
|
|
- // config is the representation of the SVM.
|
|
|
- // @jhowardmsft LIFETIME TODO - For now, a global service utility-VM
|
|
|
- config client.Config
|
|
|
- // svmScratchSpaceFile is a host location for a dedicated scratch space
|
|
|
- // that the SVM utilities can use as a scratch-space to avoid OOMs
|
|
|
- // @jhowardmsft LIFETIME TODO - For now, a global service utility-VM
|
|
|
- svmScratchSpaceFile string
|
|
|
-
|
|
|
- // it is safe for windows to use a cache here because it does not support
|
|
|
+ dataRoot string // Root path on the host where we are storing everything.
|
|
|
+ cachedSandboxFile string // Location of the local default-sized cached sandbox.
|
|
|
+ cachedSandboxMutex sync.Mutex // Protects race conditions from multiple threads creating the cached sandbox.
|
|
|
+ cachedScratchFile string // Location of the local cached empty scratch space.
|
|
|
+ cachedScratchMutex sync.Mutex // Protects race conditions from multiple threads creating the cached scratch.
|
|
|
+ options []string // Graphdriver options we are initialised with.
|
|
|
+ serviceVmsMutex sync.Mutex // Protects add/updates/delete to the serviceVMs map.
|
|
|
+ serviceVms map[string]*serviceVMItem // Map of the configs representing the service VM(s) we are running.
|
|
|
+ globalMode bool // Indicates if running in an unsafe/global service VM mode.
|
|
|
+
|
|
|
+ // NOTE: It is OK to use a cache here because Windows does not support
|
|
|
// restoring containers when the daemon dies.
|
|
|
|
|
|
- // cacheMu is the mutex protection add/update/deletes to our cache
|
|
|
- cacheMu sync.Mutex
|
|
|
- // cache is the cache of all the IDs we've mounted/unmounted.
|
|
|
- cache map[string]cacheType
|
|
|
+ cacheMutex sync.Mutex // Protects add/update/deletes to cache.
|
|
|
+ cache map[string]*cacheItem // Map holding a cache of all the IDs we've mounted/unmounted.
|
|
|
}
|
|
|
|
|
|
-// InitLCOW returns a new LCOW storage driver.
|
|
|
-func InitLCOW(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
|
|
|
+// deletefiles is a helper function for initialisation where we delete any
|
|
|
+// left-over scratch files in case we were previously forcibly terminated.
|
|
|
+func deletefiles(path string, f os.FileInfo, err error) error {
|
|
|
+ if strings.HasSuffix(f.Name(), ".vhdx") {
|
|
|
+ logrus.Warnf("lcowdriver: init: deleting stale scratch file %s", path)
|
|
|
+ return os.Remove(path)
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// InitDriver returns a new LCOW storage driver.
|
|
|
+func InitDriver(dataRoot string, options []string, _, _ []idtools.IDMap) (graphdriver.Driver, error) {
|
|
|
title := "lcowdriver: init:"
|
|
|
- logrus.Debugf("%s %s", title, home)
|
|
|
+
|
|
|
+ cd := filepath.Join(dataRoot, cacheDirectory)
|
|
|
+ sd := filepath.Join(dataRoot, scratchDirectory)
|
|
|
|
|
|
d := &Driver{
|
|
|
- homeDir: home,
|
|
|
- options: options,
|
|
|
- cachedSandboxFile: filepath.Join(home, "cache", sandboxFilename),
|
|
|
- svmScratchSpaceFile: filepath.Join(home, "svmscratch", svmScratchFilename),
|
|
|
- cache: make(map[string]cacheType),
|
|
|
+ dataRoot: dataRoot,
|
|
|
+ options: options,
|
|
|
+ cachedSandboxFile: filepath.Join(cd, sandboxFilename),
|
|
|
+ cachedScratchFile: filepath.Join(cd, scratchFilename),
|
|
|
+ cache: make(map[string]*cacheItem),
|
|
|
+ serviceVms: make(map[string]*serviceVMItem),
|
|
|
+ globalMode: false,
|
|
|
+ }
|
|
|
+
|
|
|
+ // Looks for relevant options
|
|
|
+ for _, v := range options {
|
|
|
+ opt := strings.SplitN(v, "=", 2)
|
|
|
+ if len(opt) == 2 {
|
|
|
+ switch strings.ToLower(opt[0]) {
|
|
|
+ case "lcow.globalmode":
|
|
|
+ var err error
|
|
|
+ d.globalMode, err = strconv.ParseBool(opt[1])
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("%s failed to parse value for 'lcow.globalmode' - must be 'true' or 'false'", title)
|
|
|
+ }
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- if err := idtools.MkdirAllAs(home, 0700, 0, 0); err != nil {
|
|
|
- return nil, fmt.Errorf("%s failed to create '%s': %v", title, home, err)
|
|
|
+ // Make sure the dataRoot directory is created
|
|
|
+ if err := idtools.MkdirAllAs(dataRoot, 0700, 0, 0); err != nil {
|
|
|
+ return nil, fmt.Errorf("%s failed to create '%s': %v", title, dataRoot, err)
|
|
|
}
|
|
|
|
|
|
- // Cache directory for blank sandbox so don't have to pull it from the service VM each time
|
|
|
- if err := idtools.MkdirAllAs(filepath.Dir(d.cachedSandboxFile), 0700, 0, 0); err != nil {
|
|
|
- return nil, fmt.Errorf("%s failed to create '%s': %v", title, home, err)
|
|
|
+ // Make sure the cache directory is created under dataRoot
|
|
|
+ if err := idtools.MkdirAllAs(cd, 0700, 0, 0); err != nil {
|
|
|
+ return nil, fmt.Errorf("%s failed to create '%s': %v", title, cd, err)
|
|
|
}
|
|
|
|
|
|
- // Location for the SVM scratch
|
|
|
- if err := idtools.MkdirAllAs(filepath.Dir(d.svmScratchSpaceFile), 0700, 0, 0); err != nil {
|
|
|
- return nil, fmt.Errorf("%s failed to create '%s': %v", title, home, err)
|
|
|
+ // Make sure the scratch directory is created under dataRoot
|
|
|
+ if err := idtools.MkdirAllAs(sd, 0700, 0, 0); err != nil {
|
|
|
+ return nil, fmt.Errorf("%s failed to create '%s': %v", title, sd, err)
|
|
|
}
|
|
|
|
|
|
+ // Delete any items in the scratch directory
|
|
|
+ filepath.Walk(sd, deletefiles)
|
|
|
+
|
|
|
+ logrus.Infof("%s dataRoot: %s globalMode: %t", title, dataRoot, d.globalMode)
|
|
|
+
|
|
|
return d, nil
|
|
|
}
|
|
|
|
|
|
-// startUvm starts the service utility VM if it isn't running.
|
|
|
-// TODO @jhowardmsft. This will change before RS3 ships as we move to a model of one
|
|
|
-// service VM globally to a service VM per container (or offline operation). However,
|
|
|
-// for the initial bring-up of LCOW, this is acceptable.
|
|
|
-func (d *Driver) startUvm(context string) error {
|
|
|
- const toolsScratchPath = "/mnt/gcs/LinuxServiceVM/scratch"
|
|
|
+// startServiceVMIfNotRunning starts a service utility VM if it is not currently running.
|
|
|
+// It can optionally be started with a mapped virtual disk. Returns a opengcs config structure
|
|
|
+// representing the VM.
|
|
|
+func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd *hcsshim.MappedVirtualDisk, context string) (*serviceVMItem, error) {
|
|
|
+ // Use the global ID if in global mode
|
|
|
+ if d.globalMode {
|
|
|
+ id = svmGlobalID
|
|
|
+ }
|
|
|
+
|
|
|
+ title := fmt.Sprintf("lcowdriver: startservicevmifnotrunning %s:", id)
|
|
|
|
|
|
- // Nothing to do if it's already running
|
|
|
- if d.config.Uvm != nil {
|
|
|
- return nil
|
|
|
+ // Make sure thread-safe when interrogating the map
|
|
|
+ logrus.Debugf("%s taking serviceVmsMutex", title)
|
|
|
+ d.serviceVmsMutex.Lock()
|
|
|
+
|
|
|
+ // Nothing to do if it's already running except add the mapped drive if supplied.
|
|
|
+ if svm, ok := d.serviceVms[id]; ok {
|
|
|
+ logrus.Debugf("%s exists, releasing serviceVmsMutex", title)
|
|
|
+ d.serviceVmsMutex.Unlock()
|
|
|
+
|
|
|
+ if mvdToAdd != nil {
|
|
|
+ logrus.Debugf("hot-adding %s to %s", mvdToAdd.HostPath, mvdToAdd.ContainerPath)
|
|
|
+
|
|
|
+ // Ensure the item is locked while doing this
|
|
|
+ logrus.Debugf("%s locking serviceVmItem %s", title, svm.config.Name)
|
|
|
+ svm.Lock()
|
|
|
+
|
|
|
+ if err := svm.config.HotAddVhd(mvdToAdd.HostPath, mvdToAdd.ContainerPath); err != nil {
|
|
|
+ logrus.Debugf("%s releasing serviceVmItem %s on hot-add failure %s", title, svm.config.Name, err)
|
|
|
+ svm.Unlock()
|
|
|
+ return nil, fmt.Errorf("%s hot add %s to %s failed: %s", title, mvdToAdd.HostPath, mvdToAdd.ContainerPath, err)
|
|
|
+ }
|
|
|
+
|
|
|
+ logrus.Debugf("%s releasing serviceVmItem %s", title, svm.config.Name)
|
|
|
+ svm.Unlock()
|
|
|
+ }
|
|
|
+ return svm, nil
|
|
|
+ }
|
|
|
+
|
|
|
+ // Release the lock early
|
|
|
+ logrus.Debugf("%s releasing serviceVmsMutex", title)
|
|
|
+ d.serviceVmsMutex.Unlock()
|
|
|
+
|
|
|
+ // So we are starting one. First need an enpty structure.
|
|
|
+ svm := &serviceVMItem{
|
|
|
+ config: &client.Config{},
|
|
|
}
|
|
|
|
|
|
- // So we need to start it. Generate a default configuration
|
|
|
- if err := d.config.GenerateDefault(d.options); err != nil {
|
|
|
- return fmt.Errorf("failed to generate default gogcs configuration (%s): %s", context, err)
|
|
|
+ // Generate a default configuration
|
|
|
+ if err := svm.config.GenerateDefault(d.options); err != nil {
|
|
|
+ return nil, fmt.Errorf("%s failed to generate default gogcs configuration for global svm (%s): %s", title, context, err)
|
|
|
}
|
|
|
|
|
|
- scratchAttached := false
|
|
|
- if _, err := os.Stat(d.svmScratchSpaceFile); err == nil {
|
|
|
- // We have a scratch space already, so just attach it as a mapped virtual disk
|
|
|
- logrus.Debugf("lcowdriver: startuvm: (%s) attaching pre-existing scratch", context)
|
|
|
+ // For the name, we deliberately suffix if safe-mode to ensure that it doesn't
|
|
|
+ // clash with another utility VM which may be running for the container itself.
|
|
|
+ // This also makes it easier to correlate through Get-ComputeProcess.
|
|
|
+ if id == svmGlobalID {
|
|
|
+ svm.config.Name = svmGlobalID
|
|
|
+ } else {
|
|
|
+ svm.config.Name = fmt.Sprintf("%s_svm", id)
|
|
|
+ }
|
|
|
+
|
|
|
+ // Ensure we take the cached scratch mutex around the check to ensure the file is complete
|
|
|
+ // and not in the process of being created by another thread.
|
|
|
+ scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
|
|
|
+
|
|
|
+ logrus.Debugf("%s locking cachedScratchMutex", title)
|
|
|
+ d.cachedScratchMutex.Lock()
|
|
|
+ if _, err := os.Stat(d.cachedScratchFile); err == nil {
|
|
|
+ // Make a copy of cached scratch to the scratch directory
|
|
|
+ logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) cloning cached scratch for mvd", context)
|
|
|
+ if err := client.CopyFile(d.cachedScratchFile, scratchTargetFile, true); err != nil {
|
|
|
+ logrus.Debugf("%s releasing cachedScratchMutex on err: %s", title, err)
|
|
|
+ d.cachedScratchMutex.Unlock()
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ // Add the cached clone as a mapped virtual disk
|
|
|
+ logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) adding cloned scratch as mvd", context)
|
|
|
mvd := hcsshim.MappedVirtualDisk{
|
|
|
- HostPath: d.svmScratchSpaceFile,
|
|
|
+ HostPath: scratchTargetFile,
|
|
|
ContainerPath: toolsScratchPath,
|
|
|
CreateInUtilityVM: true,
|
|
|
}
|
|
|
- d.config.MappedVirtualDisks = append(d.config.MappedVirtualDisks, mvd)
|
|
|
- scratchAttached = true
|
|
|
- }
|
|
|
-
|
|
|
- d.config.Name = "LinuxServiceVM" // TODO @jhowardmsft - This requires an in-flight platform change. Can't hard code it to this longer term
|
|
|
- if err := d.config.Create(); err != nil {
|
|
|
- return fmt.Errorf("failed to start utility VM (%s): %s", context, err)
|
|
|
+ svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvd)
|
|
|
+ svm.scratchAttached = true
|
|
|
+ }
|
|
|
+ logrus.Debugf("%s releasing cachedScratchMutex", title)
|
|
|
+ d.cachedScratchMutex.Unlock()
|
|
|
+
|
|
|
+ // If requested to start it with a mapped virtual disk, add it now.
|
|
|
+ if mvdToAdd != nil {
|
|
|
+ svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, *mvdToAdd)
|
|
|
+ }
|
|
|
+
|
|
|
+ // Start it.
|
|
|
+ logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) starting %s", context, svm.config.Name)
|
|
|
+ if err := svm.config.Create(); err != nil {
|
|
|
+ return nil, fmt.Errorf("failed to start service utility VM (%s): %s", context, err)
|
|
|
+ }
|
|
|
+
|
|
|
+ // As it's now running, add it to the map, checking for a race where another
|
|
|
+ // thread has simultaneously tried to start it.
|
|
|
+ logrus.Debugf("%s locking serviceVmsMutex for insertion", title)
|
|
|
+ d.serviceVmsMutex.Lock()
|
|
|
+ if svm, ok := d.serviceVms[id]; ok {
|
|
|
+ logrus.Debugf("%s releasing serviceVmsMutex after insertion but exists", title)
|
|
|
+ d.serviceVmsMutex.Unlock()
|
|
|
+ return svm, nil
|
|
|
+ }
|
|
|
+ d.serviceVms[id] = svm
|
|
|
+ logrus.Debugf("%s releasing serviceVmsMutex after insertion", title)
|
|
|
+ d.serviceVmsMutex.Unlock()
|
|
|
+
|
|
|
+ // Now we have a running service VM, we can create the cached scratch file if it doesn't exist.
|
|
|
+ logrus.Debugf("%s locking cachedScratchMutex", title)
|
|
|
+ d.cachedScratchMutex.Lock()
|
|
|
+ if _, err := os.Stat(d.cachedScratchFile); err != nil {
|
|
|
+ // TODO: Not a typo, but needs fixing when the platform sandbox stuff has been sorted out.
|
|
|
+ logrus.Debugf("%s (%s): creating an SVM scratch - locking serviceVM", title, context)
|
|
|
+ svm.Lock()
|
|
|
+ if err := svm.config.CreateSandbox(d.cachedScratchFile, client.DefaultSandboxSizeMB, d.cachedSandboxFile); err != nil {
|
|
|
+ logrus.Debugf("%s (%s): releasing serviceVM on error path", title, context)
|
|
|
+ svm.Unlock()
|
|
|
+ logrus.Debugf("%s (%s): releasing cachedScratchMutex on error path", title, context)
|
|
|
+ d.cachedScratchMutex.Unlock()
|
|
|
+ // TODO: NEED TO REMOVE FROM MAP HERE AND STOP IT
|
|
|
+ return nil, fmt.Errorf("failed to create SVM scratch VHDX (%s): %s", context, err)
|
|
|
+ }
|
|
|
+ logrus.Debugf("%s (%s): releasing serviceVM on error path", title, context)
|
|
|
+ svm.Unlock()
|
|
|
}
|
|
|
+ logrus.Debugf("%s (%s): releasing cachedScratchMutex", title, context)
|
|
|
+ d.cachedScratchMutex.Unlock()
|
|
|
|
|
|
// Hot-add the scratch-space if not already attached
|
|
|
- if !scratchAttached {
|
|
|
- logrus.Debugf("lcowdriver: startuvm: (%s) creating an SVM scratch", context)
|
|
|
- if err := d.config.CreateSandbox(d.svmScratchSpaceFile, client.DefaultSandboxSizeMB, d.cachedSandboxFile); err != nil {
|
|
|
- return fmt.Errorf("failed to create SVM scratch VHDX (%s): %s", context, err)
|
|
|
+ if !svm.scratchAttached {
|
|
|
+ // Make a copy of it to the layer directory
|
|
|
+ logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) cloning cached scratch for hot-add", context)
|
|
|
+ if err := client.CopyFile(d.cachedScratchFile, scratchTargetFile, true); err != nil {
|
|
|
+ // TODO: NEED TO REMOVE FROM MAP HERE AND STOP IT
|
|
|
+ return nil, err
|
|
|
}
|
|
|
- logrus.Debugf("lcowdriver: startuvm: (%s) hot-adding an SVM scratch", context)
|
|
|
- if err := d.config.HotAddVhd(d.svmScratchSpaceFile, toolsScratchPath); err != nil {
|
|
|
- return fmt.Errorf("failed to hot-add %s failed: %s", d.svmScratchSpaceFile, err)
|
|
|
+
|
|
|
+ logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) hot-adding scratch %s - locking serviceVM", context, scratchTargetFile)
|
|
|
+ svm.Lock()
|
|
|
+ if err := svm.config.HotAddVhd(scratchTargetFile, toolsScratchPath); err != nil {
|
|
|
+ logrus.Debugf("%s (%s): releasing serviceVM on error path", title, context)
|
|
|
+ svm.Unlock()
|
|
|
+ // TODOL NEED TO REMOVE FROM MAP HERE AND STOP IT
|
|
|
+ return nil, fmt.Errorf("failed to hot-add %s failed: %s", scratchTargetFile, err)
|
|
|
}
|
|
|
+ logrus.Debugf("%s (%s): releasing serviceVM", title, context)
|
|
|
+ svm.Unlock()
|
|
|
}
|
|
|
- logrus.Debugf("lcowdriver: startuvm: (%s) successful", context)
|
|
|
- return nil
|
|
|
+
|
|
|
+ logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) success", context)
|
|
|
+ return svm, nil
|
|
|
}
|
|
|
|
|
|
-// terminateUvm terminates the service utility VM if its running.
|
|
|
-func (d *Driver) terminateUvm(context string) error {
|
|
|
- // Nothing to do if it's not running
|
|
|
- if d.config.Uvm == nil {
|
|
|
- return nil
|
|
|
- }
|
|
|
+// getServiceVM returns the appropriate service utility VM instance, optionally
|
|
|
+// deleting it from the map (but not the global one)
|
|
|
+func (d *Driver) getServiceVM(id string, deleteFromMap bool) (*serviceVMItem, error) {
|
|
|
+ logrus.Debugf("lcowdriver: getservicevm:locking serviceVmsMutex")
|
|
|
+ d.serviceVmsMutex.Lock()
|
|
|
+ defer func() {
|
|
|
+ logrus.Debugf("lcowdriver: getservicevm:releasing serviceVmsMutex")
|
|
|
+ d.serviceVmsMutex.Unlock()
|
|
|
+ }()
|
|
|
+ if d.globalMode {
|
|
|
+ id = svmGlobalID
|
|
|
+ }
|
|
|
+ if _, ok := d.serviceVms[id]; !ok {
|
|
|
+ return nil, fmt.Errorf("getservicevm for %s failed as not found", id)
|
|
|
+ }
|
|
|
+ svm := d.serviceVms[id]
|
|
|
+ if deleteFromMap && id != svmGlobalID {
|
|
|
+ logrus.Debugf("lcowdriver: getservicevm: removing %s from map", id)
|
|
|
+ delete(d.serviceVms, id)
|
|
|
+ }
|
|
|
+ return svm, nil
|
|
|
+}
|
|
|
+
|
|
|
+// terminateServiceVM terminates a service utility VM if its running, but does nothing
|
|
|
+// when in global mode as it's lifetime is limited to that of the daemon.
|
|
|
+func (d *Driver) terminateServiceVM(id, context string, force bool) error {
|
|
|
|
|
|
- // FIXME: @jhowardmsft
|
|
|
- // This isn't thread-safe yet, but will change anyway with the lifetime
|
|
|
- // changes and multiple instances. Deferring that work for now.
|
|
|
- uvm := d.config.Uvm
|
|
|
- d.config.Uvm = nil
|
|
|
+ // We don't do anything in safe mode unless the force flag has been passed, which
|
|
|
+ // is only the case for cleanup at driver termination.
|
|
|
+ if d.globalMode {
|
|
|
+ if !force {
|
|
|
+ logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - doing nothing as in global mode", id, context)
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ id = svmGlobalID
|
|
|
+ }
|
|
|
|
|
|
- if err := uvm.Terminate(); err != nil {
|
|
|
- return fmt.Errorf("failed to terminate utility VM (%s): %s", context, err)
|
|
|
+ // Get the service VM and delete it from the map
|
|
|
+ svm, err := d.getServiceVM(id, true)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
}
|
|
|
|
|
|
- if err := uvm.WaitTimeout(time.Duration(d.config.UvmTimeoutSeconds) * time.Second); err != nil {
|
|
|
- return fmt.Errorf("failed waiting for utility VM to terminate (%s): %s", context, err)
|
|
|
+ // We run the deletion of the scratch as a deferred function to at least attempt
|
|
|
+ // clean-up in case of errors.
|
|
|
+ defer func() {
|
|
|
+ if svm.scratchAttached {
|
|
|
+ scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
|
|
|
+ logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - deleting scratch %s", id, context, scratchTargetFile)
|
|
|
+ if err := os.Remove(scratchTargetFile); err != nil {
|
|
|
+ logrus.Warnf("failed to remove scratch file %s (%s): %s", scratchTargetFile, context, err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
+ // Nothing to do if it's not running
|
|
|
+ if svm.config.Uvm != nil {
|
|
|
+ logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - calling terminate", id, context)
|
|
|
+ if err := svm.config.Uvm.Terminate(); err != nil {
|
|
|
+ return fmt.Errorf("failed to terminate utility VM (%s): %s", context, err)
|
|
|
+ }
|
|
|
+
|
|
|
+ logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - waiting for utility VM to terminate", id, context)
|
|
|
+ if err := svm.config.Uvm.WaitTimeout(time.Duration(svm.config.UvmTimeoutSeconds) * time.Second); err != nil {
|
|
|
+ return fmt.Errorf("failed waiting for utility VM to terminate (%s): %s", context, err)
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
+ logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - success", id, context)
|
|
|
return nil
|
|
|
}
|
|
|
|
|
@@ -186,6 +419,7 @@ func (d *Driver) String() string {
|
|
|
func (d *Driver) Status() [][2]string {
|
|
|
return [][2]string{
|
|
|
{"LCOW", ""},
|
|
|
+ // TODO: Add some more info here - mode, home, ....
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -197,22 +431,69 @@ func (d *Driver) Exists(id string) bool {
|
|
|
}
|
|
|
|
|
|
// CreateReadWrite creates a layer that is writable for use as a container
|
|
|
-// file system. That equates to creating a sandbox VHDx.
|
|
|
+// file system. That equates to creating a sandbox.
|
|
|
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
|
|
|
- logrus.Debugf("lcowdriver: createreadwrite: id %s", id)
|
|
|
+ title := fmt.Sprintf("lcowdriver: createreadwrite: id %s", id)
|
|
|
+ logrus.Debugf(title)
|
|
|
|
|
|
- if err := d.startUvm("createreadwrite"); err != nil {
|
|
|
+ // First we need to create the folder
|
|
|
+ if err := d.Create(id, parent, opts); err != nil {
|
|
|
return err
|
|
|
}
|
|
|
|
|
|
- if err := d.Create(id, parent, opts); err != nil {
|
|
|
+ // Massive perf optimisation here. If we know that the RW layer is the default size,
|
|
|
+ // and that the cached sandbox already exists, and we are running in safe mode, we
|
|
|
+ // can just do a simple copy into the layers sandbox file without needing to start a
|
|
|
+ // unique service VM. For a global service VM, it doesn't really matter.
|
|
|
+ //
|
|
|
+ // TODO: @jhowardmsft Where are we going to get the required size from?
|
|
|
+ // We need to look at the CreateOpts for that, I think....
|
|
|
+
|
|
|
+ // Make sure we have the sandbox mutex taken while we are examining it.
|
|
|
+ logrus.Debugf("%s: locking cachedSandboxMutex", title)
|
|
|
+ d.cachedSandboxMutex.Lock()
|
|
|
+ _, err := os.Stat(d.cachedSandboxFile)
|
|
|
+ logrus.Debugf("%s: releasing cachedSandboxMutex", title)
|
|
|
+ d.cachedSandboxMutex.Unlock()
|
|
|
+ if err == nil {
|
|
|
+ logrus.Debugf("%s: using cached sandbox to populate", title)
|
|
|
+ if err := client.CopyFile(d.cachedSandboxFile, filepath.Join(d.dir(id), sandboxFilename), true); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+
|
|
|
+ logrus.Debugf("%s: creating SVM to create sandbox", title)
|
|
|
+ svm, err := d.startServiceVMIfNotRunning(id, nil, "createreadwrite")
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ defer d.terminateServiceVM(id, "createreadwrite", false)
|
|
|
+
|
|
|
+ // So the cached sandbox needs creating. Ensure we are the only thread creating it.
|
|
|
+ logrus.Debugf("%s: locking cachedSandboxMutex for creation", title)
|
|
|
+ d.cachedSandboxMutex.Lock()
|
|
|
+ defer func() {
|
|
|
+ logrus.Debugf("%s: releasing cachedSandboxMutex for creation", title)
|
|
|
+ d.cachedSandboxMutex.Unlock()
|
|
|
+ }()
|
|
|
+
|
|
|
+ // Synchronise the operation in the service VM.
|
|
|
+ logrus.Debugf("%s: locking svm for sandbox creation", title)
|
|
|
+ svm.Lock()
|
|
|
+ defer func() {
|
|
|
+ logrus.Debugf("%s: releasing svm for sandbox creation", title)
|
|
|
+ svm.Unlock()
|
|
|
+ }()
|
|
|
+ if err := svm.config.CreateSandbox(filepath.Join(d.dir(id), sandboxFilename), client.DefaultSandboxSizeMB, d.cachedSandboxFile); err != nil {
|
|
|
return err
|
|
|
}
|
|
|
|
|
|
- return d.config.CreateSandbox(filepath.Join(d.dir(id), sandboxFilename), client.DefaultSandboxSizeMB, d.cachedSandboxFile)
|
|
|
+ return nil
|
|
|
}
|
|
|
|
|
|
-// Create creates a new read-only layer with the given id.
|
|
|
+// Create creates the folder for the layer with the given id, and
|
|
|
+// adds it to the layer chain.
|
|
|
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
|
|
|
logrus.Debugf("lcowdriver: create: id %s parent: %s", id, parent)
|
|
|
|
|
@@ -224,7 +505,7 @@ func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
|
|
|
var layerChain []string
|
|
|
if parent != "" {
|
|
|
if !d.Exists(parent) {
|
|
|
- return fmt.Errorf("lcowdriver: cannot create read-only layer with missing parent %s", parent)
|
|
|
+ return fmt.Errorf("lcowdriver: cannot create layer folder with missing parent %s", parent)
|
|
|
}
|
|
|
layerChain = []string{d.dir(parent)}
|
|
|
}
|
|
@@ -232,18 +513,18 @@ func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
|
|
|
|
|
|
// Make sure layers are created with the correct ACL so that VMs can access them.
|
|
|
layerPath := d.dir(id)
|
|
|
- logrus.Debugf("lcowdriver: create: id %s: creating layerPath %s", id, layerPath)
|
|
|
+ logrus.Debugf("lcowdriver: create: id %s: creating %s", id, layerPath)
|
|
|
if err := system.MkdirAllWithACL(layerPath, 755, system.SddlNtvmAdministratorsLocalSystem); err != nil {
|
|
|
return err
|
|
|
}
|
|
|
|
|
|
if err := d.setLayerChain(id, layerChain); err != nil {
|
|
|
if err2 := os.RemoveAll(layerPath); err2 != nil {
|
|
|
- logrus.Warnf("Failed to remove layer %s: %s", layerPath, err2)
|
|
|
+ logrus.Warnf("failed to remove layer %s: %s", layerPath, err2)
|
|
|
}
|
|
|
return err
|
|
|
}
|
|
|
- logrus.Debugf("lcowdriver: createreadwrite: id %s: success", id)
|
|
|
+ logrus.Debugf("lcowdriver: create: id %s: success", id)
|
|
|
|
|
|
return nil
|
|
|
}
|
|
@@ -270,111 +551,123 @@ func (d *Driver) Remove(id string) error {
|
|
|
|
|
|
// Get returns the rootfs path for the id. It is reference counted and
|
|
|
// effectively can be thought of as a "mount the layer into the utility
|
|
|
-// vm if it isn't already"
|
|
|
+// vm if it isn't already". The contract from the caller of this is that
|
|
|
+// all Gets and Puts are matched. It -should- be the case that on cleanup,
|
|
|
+// nothing is mounted.
|
|
|
+//
|
|
|
+// For optimisation, we don't actually mount the filesystem (which in our
|
|
|
+// case means [hot-]adding it to a service VM. But we track that and defer
|
|
|
+// the actual adding to the point we need to access it.
|
|
|
func (d *Driver) Get(id, mountLabel string) (string, error) {
|
|
|
- dir, _, _, err := d.getEx(id)
|
|
|
- return dir, err
|
|
|
-}
|
|
|
-
|
|
|
-// getEx is Get, but also returns the cache-entry and the size of the VHD
|
|
|
-func (d *Driver) getEx(id string) (string, cacheType, int64, error) {
|
|
|
- title := "lcowdriver: getEx"
|
|
|
- logrus.Debugf("%s %s", title, id)
|
|
|
-
|
|
|
- if err := d.startUvm(fmt.Sprintf("getex %s", id)); err != nil {
|
|
|
- logrus.Debugf("%s failed to start utility vm: %s", title, err)
|
|
|
- return "", cacheType{}, 0, err
|
|
|
- }
|
|
|
+ title := fmt.Sprintf("lcowdriver: get: %s", id)
|
|
|
+ logrus.Debugf(title)
|
|
|
|
|
|
// Work out what we are working on
|
|
|
- vhdFilename, vhdSize, isSandbox, err := client.LayerVhdDetails(d.dir(id))
|
|
|
+ vhdFilename, vhdSize, isSandbox, err := getLayerDetails(d.dir(id))
|
|
|
if err != nil {
|
|
|
- logrus.Debugf("%s failed to get LayerVhdDetails from %s: %s", title, d.dir(id), err)
|
|
|
- return "", cacheType{}, 0, fmt.Errorf("%s failed to open layer or sandbox VHD to open in %s: %s", title, d.dir(id), err)
|
|
|
+ logrus.Debugf("%s failed to get layer details from %s: %s", title, d.dir(id), err)
|
|
|
+ return "", fmt.Errorf("%s failed to open layer or sandbox VHD to open in %s: %s", title, d.dir(id), err)
|
|
|
}
|
|
|
logrus.Debugf("%s %s, size %d, isSandbox %t", title, vhdFilename, vhdSize, isSandbox)
|
|
|
|
|
|
- hotAddRequired := false
|
|
|
- d.cacheMu.Lock()
|
|
|
- var cacheEntry cacheType
|
|
|
- if _, ok := d.cache[id]; !ok {
|
|
|
+ // Add item to cache, or update existing item, but ensure we have the
|
|
|
+ // lock while updating items.
|
|
|
+ logrus.Debugf("%s: locking cacheMutex", title)
|
|
|
+ d.cacheMutex.Lock()
|
|
|
+ var cacheEntry *cacheItem
|
|
|
+ if entry, ok := d.cache[id]; !ok {
|
|
|
// The item is not currently in the cache.
|
|
|
- //
|
|
|
- // Sandboxes need hot-adding in the case that there is a single global utility VM
|
|
|
- // This will change for multiple instances with the lifetime changes.
|
|
|
- if isSandbox {
|
|
|
- hotAddRequired = true
|
|
|
- }
|
|
|
- d.cache[id] = cacheType{
|
|
|
- uvmPath: fmt.Sprintf("/mnt/%s", id),
|
|
|
+ cacheEntry = &cacheItem{
|
|
|
refCount: 1,
|
|
|
isSandbox: isSandbox,
|
|
|
hostPath: vhdFilename,
|
|
|
+ uvmPath: fmt.Sprintf("/mnt/%s", id),
|
|
|
+ isMounted: false, // we defer this as an optimisation
|
|
|
}
|
|
|
+ d.cache[id] = cacheEntry
|
|
|
+ logrus.Debugf("%s: added cache entry %+v", title, cacheEntry)
|
|
|
} else {
|
|
|
// Increment the reference counter in the cache.
|
|
|
- cacheEntry = d.cache[id]
|
|
|
- cacheEntry.refCount++
|
|
|
- d.cache[id] = cacheEntry
|
|
|
- }
|
|
|
-
|
|
|
- cacheEntry = d.cache[id]
|
|
|
- logrus.Debugf("%s %s: isSandbox %t, refCount %d", title, id, cacheEntry.isSandbox, cacheEntry.refCount)
|
|
|
- d.cacheMu.Unlock()
|
|
|
-
|
|
|
- if hotAddRequired {
|
|
|
- logrus.Debugf("%s %s: Hot-Adding %s", title, id, vhdFilename)
|
|
|
- if err := d.config.HotAddVhd(vhdFilename, cacheEntry.uvmPath); err != nil {
|
|
|
- return "", cacheType{}, 0, fmt.Errorf("%s hot add %s failed: %s", title, vhdFilename, err)
|
|
|
- }
|
|
|
+ logrus.Debugf("%s: locking cache item for increment", title)
|
|
|
+ entry.Lock()
|
|
|
+ entry.refCount++
|
|
|
+ logrus.Debugf("%s: releasing cache item for increment", title)
|
|
|
+ entry.Unlock()
|
|
|
+ logrus.Debugf("%s: incremented refcount on cache entry %+v", title, cacheEntry)
|
|
|
}
|
|
|
+ logrus.Debugf("%s: releasing cacheMutex", title)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
|
|
|
logrus.Debugf("%s %s success. %s: %+v: size %d", title, id, d.dir(id), cacheEntry, vhdSize)
|
|
|
- return d.dir(id), cacheEntry, vhdSize, nil
|
|
|
+ return d.dir(id), nil
|
|
|
}
|
|
|
|
|
|
// Put does the reverse of get. If there are no more references to
|
|
|
// the layer, it unmounts it from the utility VM.
|
|
|
func (d *Driver) Put(id string) error {
|
|
|
- title := "lcowdriver: put"
|
|
|
- logrus.Debugf("%s %s", title, id)
|
|
|
-
|
|
|
- if err := d.startUvm(fmt.Sprintf("put %s", id)); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
+ title := fmt.Sprintf("lcowdriver: put: %s", id)
|
|
|
|
|
|
- d.cacheMu.Lock()
|
|
|
- // Bad-news if unmounting something that isn't in the cache.
|
|
|
+ logrus.Debugf("%s: locking cacheMutex", title)
|
|
|
+ d.cacheMutex.Lock()
|
|
|
entry, ok := d.cache[id]
|
|
|
if !ok {
|
|
|
- d.cacheMu.Unlock()
|
|
|
+ logrus.Debugf("%s: releasing cacheMutex on error path", title)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
return fmt.Errorf("%s possible ref-count error, or invalid id was passed to the graphdriver. Cannot handle id %s as it's not in the cache", title, id)
|
|
|
}
|
|
|
|
|
|
- // Are we just decrementing the reference count
|
|
|
+ // Are we just decrementing the reference count?
|
|
|
+ logrus.Debugf("%s: locking cache item for possible decrement", title)
|
|
|
+ entry.Lock()
|
|
|
if entry.refCount > 1 {
|
|
|
entry.refCount--
|
|
|
- d.cache[id] = entry
|
|
|
- logrus.Debugf("%s %s: refCount decremented to %d", title, id, entry.refCount)
|
|
|
- d.cacheMu.Unlock()
|
|
|
+ logrus.Debugf("%s: releasing cache item for decrement and early get-out as refCount is now %d", title, entry.refCount)
|
|
|
+ entry.Unlock()
|
|
|
+ logrus.Debugf("%s: refCount decremented to %d. Releasing cacheMutex", title, entry.refCount)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
return nil
|
|
|
}
|
|
|
+ logrus.Debugf("%s: releasing cache item", title)
|
|
|
+ entry.Unlock()
|
|
|
+ logrus.Debugf("%s: releasing cacheMutex. Ref count has dropped to zero", title)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
+
|
|
|
+ // To reach this point, the reference count has dropped to zero. If we have
|
|
|
+ // done a mount and we are in global mode, then remove it. We don't
|
|
|
+ // need to remove in safe mode as the service VM is going to be torn down
|
|
|
+ // anyway.
|
|
|
+
|
|
|
+ if d.globalMode {
|
|
|
+ logrus.Debugf("%s: locking cache item at zero ref-count", title)
|
|
|
+ entry.Lock()
|
|
|
+ defer func() {
|
|
|
+ logrus.Debugf("%s: releasing cache item at zero ref-count", title)
|
|
|
+ entry.Unlock()
|
|
|
+ }()
|
|
|
+ if entry.isMounted {
|
|
|
+ svm, err := d.getServiceVM(id, false)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
|
|
|
- // No more references, so tear it down if previously hot-added
|
|
|
- if entry.isSandbox {
|
|
|
- logrus.Debugf("%s %s: Hot-Removing %s", title, id, entry.hostPath)
|
|
|
- if err := d.config.HotRemoveVhd(entry.hostPath); err != nil {
|
|
|
- d.cacheMu.Unlock()
|
|
|
- return fmt.Errorf("%s failed to hot-remove %s from service utility VM: %s", title, entry.hostPath, err)
|
|
|
+ logrus.Debugf("%s: Hot-Removing %s. Locking svm", title, entry.hostPath)
|
|
|
+ svm.Lock()
|
|
|
+ if err := svm.config.HotRemoveVhd(entry.hostPath); err != nil {
|
|
|
+ logrus.Debugf("%s: releasing svm on error path", title)
|
|
|
+ svm.Unlock()
|
|
|
+ return fmt.Errorf("%s failed to hot-remove %s from global service utility VM: %s", title, entry.hostPath, err)
|
|
|
+ }
|
|
|
+ logrus.Debugf("%s: releasing svm", title)
|
|
|
+ svm.Unlock()
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // @jhowardmsft TEMPORARY FIX WHILE WAITING FOR HOT-REMOVE TO BE FIXED IN PLATFORM
|
|
|
- //d.terminateUvm(fmt.Sprintf("put %s", id))
|
|
|
-
|
|
|
// Remove from the cache map.
|
|
|
+ logrus.Debugf("%s: Locking cacheMutex to delete item from cache", title)
|
|
|
+ d.cacheMutex.Lock()
|
|
|
delete(d.cache, id)
|
|
|
- d.cacheMu.Unlock()
|
|
|
+ logrus.Debugf("%s: releasing cacheMutex after item deleted from cache", title)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
|
|
|
logrus.Debugf("%s %s: refCount 0. %s (%s) completed successfully", title, id, entry.hostPath, entry.uvmPath)
|
|
|
return nil
|
|
@@ -385,18 +678,17 @@ func (d *Driver) Put(id string) error {
|
|
|
// still left if the daemon was killed while it was removing a layer.
|
|
|
func (d *Driver) Cleanup() error {
|
|
|
title := "lcowdriver: cleanup"
|
|
|
- logrus.Debugf(title)
|
|
|
|
|
|
- d.cacheMu.Lock()
|
|
|
+ d.cacheMutex.Lock()
|
|
|
for k, v := range d.cache {
|
|
|
logrus.Debugf("%s cache entry: %s: %+v", title, k, v)
|
|
|
if v.refCount > 0 {
|
|
|
logrus.Warnf("%s leaked %s: %+v", title, k, v)
|
|
|
}
|
|
|
}
|
|
|
- d.cacheMu.Unlock()
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
|
|
|
- items, err := ioutil.ReadDir(d.homeDir)
|
|
|
+ items, err := ioutil.ReadDir(d.dataRoot)
|
|
|
if err != nil {
|
|
|
if os.IsNotExist(err) {
|
|
|
return nil
|
|
@@ -410,13 +702,21 @@ func (d *Driver) Cleanup() error {
|
|
|
// warnings if there are errors.
|
|
|
for _, item := range items {
|
|
|
if item.IsDir() && strings.HasSuffix(item.Name(), "-removing") {
|
|
|
- if err := os.RemoveAll(filepath.Join(d.homeDir, item.Name())); err != nil {
|
|
|
+ if err := os.RemoveAll(filepath.Join(d.dataRoot, item.Name())); err != nil {
|
|
|
logrus.Warnf("%s failed to cleanup %s: %s", title, item.Name(), err)
|
|
|
} else {
|
|
|
logrus.Infof("%s cleaned up %s", title, item.Name())
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ // Cleanup any service VMs we have running, along with their scratch spaces.
|
|
|
+ // We don't take the lock for this as it's taken in terminateServiceVm.
|
|
|
+ for k, v := range d.serviceVms {
|
|
|
+ logrus.Debugf("%s svm entry: %s: %+v", title, k, v)
|
|
|
+ d.terminateServiceVM(k, "cleanup", true)
|
|
|
+ }
|
|
|
+
|
|
|
return nil
|
|
|
}
|
|
|
|
|
@@ -425,35 +725,88 @@ func (d *Driver) Cleanup() error {
|
|
|
// a tarstream representing the layers contents. The id could be
|
|
|
// a read-only "layer.vhd" or a read-write "sandbox.vhdx". The semantics
|
|
|
// of this function dictate that the layer is already mounted.
|
|
|
+// However, as we do lazy mounting as a performance optimisation,
|
|
|
+// this will likely not be the case.
|
|
|
func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
|
|
|
- title := "lcowdriver: diff:"
|
|
|
- logrus.Debugf("%s id %s", title, id)
|
|
|
+ title := fmt.Sprintf("lcowdriver: diff: %s", id)
|
|
|
|
|
|
- if err := d.startUvm(fmt.Sprintf("diff %s", id)); err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
-
|
|
|
- d.cacheMu.Lock()
|
|
|
+ logrus.Debugf("%s: locking cacheMutex", title)
|
|
|
+ d.cacheMutex.Lock()
|
|
|
if _, ok := d.cache[id]; !ok {
|
|
|
- d.cacheMu.Unlock()
|
|
|
+ logrus.Debugf("%s: releasing cacheMutex on error path", title)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
return nil, fmt.Errorf("%s fail as %s is not in the cache", title, id)
|
|
|
}
|
|
|
cacheEntry := d.cache[id]
|
|
|
- d.cacheMu.Unlock()
|
|
|
+ logrus.Debugf("%s: releasing cacheMutex", title)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
|
|
|
// Stat to get size
|
|
|
+ logrus.Debugf("%s: locking cacheEntry", title)
|
|
|
+ cacheEntry.Lock()
|
|
|
fileInfo, err := os.Stat(cacheEntry.hostPath)
|
|
|
if err != nil {
|
|
|
+ logrus.Debugf("%s: releasing cacheEntry on error path", title)
|
|
|
+ cacheEntry.Unlock()
|
|
|
return nil, fmt.Errorf("%s failed to stat %s: %s", title, cacheEntry.hostPath, err)
|
|
|
}
|
|
|
+ logrus.Debugf("%s: releasing cacheEntry", title)
|
|
|
+ cacheEntry.Unlock()
|
|
|
|
|
|
- // Then obtain the tar stream for it
|
|
|
+ // Start the SVM with a mapped virtual disk. Note that if the SVM is
|
|
|
+ // already runing and we are in global mode, this will be
|
|
|
+ // hot-added.
|
|
|
+ mvd := &hcsshim.MappedVirtualDisk{
|
|
|
+ HostPath: cacheEntry.hostPath,
|
|
|
+ ContainerPath: cacheEntry.uvmPath,
|
|
|
+ CreateInUtilityVM: true,
|
|
|
+ ReadOnly: true,
|
|
|
+ }
|
|
|
+
|
|
|
+ logrus.Debugf("%s: starting service VM", title)
|
|
|
+ svm, err := d.startServiceVMIfNotRunning(id, mvd, fmt.Sprintf("diff %s", id))
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ // Set `isMounted` for the cache entry. Note that we re-scan the cache
|
|
|
+ // at this point as it's possible the cacheEntry changed during the long-
|
|
|
+ // running operation above when we weren't holding the cacheMutex lock.
|
|
|
+ logrus.Debugf("%s: locking cacheMutex for updating isMounted", title)
|
|
|
+ d.cacheMutex.Lock()
|
|
|
+ if _, ok := d.cache[id]; !ok {
|
|
|
+ logrus.Debugf("%s: releasing cacheMutex on error path of isMounted", title)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
+ d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
|
|
|
+ return nil, fmt.Errorf("%s fail as %s is not in the cache when updating isMounted", title, id)
|
|
|
+ }
|
|
|
+ cacheEntry = d.cache[id]
|
|
|
+ logrus.Debugf("%s: locking cacheEntry for updating isMounted", title)
|
|
|
+ cacheEntry.Lock()
|
|
|
+ cacheEntry.isMounted = true
|
|
|
+ logrus.Debugf("%s: releasing cacheEntry for updating isMounted", title)
|
|
|
+ cacheEntry.Unlock()
|
|
|
+ logrus.Debugf("%s: releasing cacheMutex for updating isMounted", title)
|
|
|
+ d.cacheMutex.Unlock()
|
|
|
+
|
|
|
+ // Obtain the tar stream for it
|
|
|
logrus.Debugf("%s %s, size %d, isSandbox %t", title, cacheEntry.hostPath, fileInfo.Size(), cacheEntry.isSandbox)
|
|
|
- tarReadCloser, err := d.config.VhdToTar(cacheEntry.hostPath, cacheEntry.uvmPath, cacheEntry.isSandbox, fileInfo.Size())
|
|
|
+ tarReadCloser, err := svm.config.VhdToTar(cacheEntry.hostPath, cacheEntry.uvmPath, cacheEntry.isSandbox, fileInfo.Size())
|
|
|
if err != nil {
|
|
|
+ d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
|
|
|
return nil, fmt.Errorf("%s failed to export layer to tar stream for id: %s, parent: %s : %s", title, id, parent, err)
|
|
|
}
|
|
|
+
|
|
|
logrus.Debugf("%s id %s parent %s completed successfully", title, id, parent)
|
|
|
+
|
|
|
+ // In safe/non-global mode, we can't tear down the service VM until things have been read.
|
|
|
+ if !d.globalMode {
|
|
|
+ return ioutils.NewReadCloserWrapper(tarReadCloser, func() error {
|
|
|
+ tarReadCloser.Close()
|
|
|
+ d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
|
|
|
+ return nil
|
|
|
+ }), nil
|
|
|
+ }
|
|
|
return tarReadCloser, nil
|
|
|
}
|
|
|
|
|
@@ -465,11 +818,26 @@ func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
|
|
|
func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
|
|
|
logrus.Debugf("lcowdriver: applydiff: id %s", id)
|
|
|
|
|
|
- if err := d.startUvm(fmt.Sprintf("applydiff %s", id)); err != nil {
|
|
|
+ svm, err := d.startServiceVMIfNotRunning(id, nil, fmt.Sprintf("applydiff %s", id))
|
|
|
+ if err != nil {
|
|
|
return 0, err
|
|
|
}
|
|
|
-
|
|
|
- return d.config.TarToVhd(filepath.Join(d.homeDir, id, "layer.vhd"), diff)
|
|
|
+ defer d.terminateServiceVM(id, fmt.Sprintf("applydiff %s", id), false)
|
|
|
+
|
|
|
+ // TODO @jhowardmsft - the retries are temporary to overcome platform reliablity issues.
|
|
|
+ // Obviously this will be removed as platform bugs are fixed.
|
|
|
+ retries := 0
|
|
|
+ for {
|
|
|
+ retries++
|
|
|
+ size, err := svm.config.TarToVhd(filepath.Join(d.dataRoot, id, layerFilename), diff)
|
|
|
+ if err != nil {
|
|
|
+ if retries <= 10 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ return 0, err
|
|
|
+ }
|
|
|
+ return size, err
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
// Changes produces a list of changes between the specified layer
|
|
@@ -500,7 +868,7 @@ func (d *Driver) GetMetadata(id string) (map[string]string, error) {
|
|
|
|
|
|
// dir returns the absolute path to the layer.
|
|
|
func (d *Driver) dir(id string) string {
|
|
|
- return filepath.Join(d.homeDir, filepath.Base(id))
|
|
|
+ return filepath.Join(d.dataRoot, filepath.Base(id))
|
|
|
}
|
|
|
|
|
|
// getLayerChain returns the layer chain information.
|
|
@@ -537,3 +905,25 @@ func (d *Driver) setLayerChain(id string, chain []string) error {
|
|
|
}
|
|
|
return nil
|
|
|
}
|
|
|
+
|
|
|
+// getLayerDetails is a utility for getting a file name, size and indication of
|
|
|
+// sandbox for a VHD(x) in a folder. A read-only layer will be layer.vhd. A
|
|
|
+// read-write layer will be sandbox.vhdx.
|
|
|
+func getLayerDetails(folder string) (string, int64, bool, error) {
|
|
|
+ var fileInfo os.FileInfo
|
|
|
+ isSandbox := false
|
|
|
+ filename := filepath.Join(folder, layerFilename)
|
|
|
+ var err error
|
|
|
+
|
|
|
+ if fileInfo, err = os.Stat(filename); err != nil {
|
|
|
+ filename = filepath.Join(folder, sandboxFilename)
|
|
|
+ if fileInfo, err = os.Stat(filename); err != nil {
|
|
|
+ if os.IsNotExist(err) {
|
|
|
+ return "", 0, isSandbox, fmt.Errorf("could not find layer or sandbox in %s", folder)
|
|
|
+ }
|
|
|
+ return "", 0, isSandbox, fmt.Errorf("error locating layer or sandbox in %s: %s", folder, err)
|
|
|
+ }
|
|
|
+ isSandbox = true
|
|
|
+ }
|
|
|
+ return filename, fileInfo.Size(), isSandbox, nil
|
|
|
+}
|