瀏覽代碼

Merge pull request #34170 from Microsoft/jjh/sandbox

LCOW: Dynamic sandbox management
John Stephens 8 年之前
父節點
當前提交
a3ffc42b13

+ 204 - 124
daemon/graphdriver/lcow/lcow.go

@@ -13,7 +13,43 @@
 // operations. The downside of safe-mode is that operations are slower as
 // a new service utility VM has to be started and torn-down when needed.
 //
-// To enable global mode, run with --storage-opt lcow.globalmode=true
+// Options (needs official documentation, but lets get full functionality first...) @jhowardmsft
+//
+// The following options are read by the graphdriver itself:
+//
+//   * lcow.globalmode - Enables global service VM Mode
+//        -- Possible values:     true/false
+//        -- Default if omitted:  false
+//
+//   * lcow.sandboxsize - Specifies a custom sandbox size in GB for starting a container
+//        -- Possible values:      >= default sandbox size (opengcs defined, currently 20)
+//        -- Default if ommitted:  20
+//
+// The following options are read by opengcs:
+//
+//   * lcow.kirdpath - Specifies a custom path to a kernel/initrd pair
+//        -- Possible values:      Any local path that is not a mapped drive
+//        -- Default if ommitted:  %ProgramFiles%\Linux Containers
+//
+//   * lcow.kernel - Specifies a custom kernel file located in the `lcow.kirdpath` path
+//        -- Possible values:      Any valid filename
+//        -- Default if ommitted:  bootx64.efi
+//
+//   * lcow.initrd - Specifies a custom initrd file located in the `lcow.kirdpath` path
+//        -- Possible values:      Any valid filename
+//        -- Default if ommitted:  initrd.img
+//
+//   * lcow.bootparameters - Specifies additional boot parameters for booting in kernel+initrd mode
+//        -- Possible values:      Any valid linux kernel boot options
+//        -- Default if ommitted:  <nil>
+//
+//   * lcow.vhdx - Specifies a custom vhdx file to boot (instead of a kernel+initrd)
+//        -- Possible values:      Any valid filename
+//        -- Default if ommitted:  uvm.vhdx under `lcow.kirdpath`
+//
+//   * lcow.timeout - Specifies a timeout for utility VM operations in seconds
+//        -- Possible values:      >=0
+//        -- Default if ommitted:  300
 
 // TODO: Grab logs from SVM at terminate or errors
 
@@ -84,6 +120,34 @@ type cacheItem struct {
 	isMounted  bool   // True when mounted in a service VM
 }
 
+// setIsMounted is a helper function for a cacheItem which does exactly what it says
+func (ci *cacheItem) setIsMounted() {
+	logrus.Debugf("locking cache item for set isMounted")
+	ci.Lock()
+	defer ci.Unlock()
+	ci.isMounted = true
+	logrus.Debugf("set isMounted on cache item")
+}
+
+// incrementRefCount is a helper function for a cacheItem which does exactly what it says
+func (ci *cacheItem) incrementRefCount() {
+	logrus.Debugf("locking cache item for increment")
+	ci.Lock()
+	defer ci.Unlock()
+	ci.refCount++
+	logrus.Debugf("incremented refcount on cache item %+v", ci)
+}
+
+// decrementRefCount is a helper function for a cacheItem which does exactly what it says
+func (ci *cacheItem) decrementRefCount() int {
+	logrus.Debugf("locking cache item for decrement")
+	ci.Lock()
+	defer ci.Unlock()
+	ci.refCount--
+	logrus.Debugf("decremented refcount on cache item %+v", ci)
+	return ci.refCount
+}
+
 // serviceVMItem is our internal structure representing an item in our
 // map of service VMs we are maintaining.
 type serviceVMItem struct {
@@ -111,6 +175,14 @@ type Driver struct {
 	cache      map[string]*cacheItem // Map holding a cache of all the IDs we've mounted/unmounted.
 }
 
+// layerDetails is the structure returned by a helper function `getLayerDetails`
+// for getting information about a layer folder
+type layerDetails struct {
+	filename  string // \path\to\sandbox.vhdx or \path\to\layer.vhd
+	size      int64  // size of the above file
+	isSandbox bool   // true if sandbox.vhdx
+}
+
 // deletefiles is a helper function for initialisation where we delete any
 // left-over scratch files in case we were previously forcibly terminated.
 func deletefiles(path string, f os.FileInfo, err error) error {
@@ -204,7 +276,7 @@ func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd *hcsshim.MappedV
 			logrus.Debugf("%s locking serviceVmItem %s", title, svm.config.Name)
 			svm.Lock()
 
-			if err := svm.config.HotAddVhd(mvdToAdd.HostPath, mvdToAdd.ContainerPath); err != nil {
+			if err := svm.config.HotAddVhd(mvdToAdd.HostPath, mvdToAdd.ContainerPath, false, true); err != nil {
 				logrus.Debugf("%s releasing serviceVmItem %s on hot-add failure %s", title, svm.config.Name, err)
 				svm.Unlock()
 				return nil, fmt.Errorf("%s hot add %s to %s failed: %s", title, mvdToAdd.HostPath, mvdToAdd.ContainerPath, err)
@@ -274,7 +346,7 @@ func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd *hcsshim.MappedV
 
 	// Start it.
 	logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) starting %s", context, svm.config.Name)
-	if err := svm.config.Create(); err != nil {
+	if err := svm.config.StartUtilityVM(); err != nil {
 		return nil, fmt.Errorf("failed to start service utility VM (%s): %s", context, err)
 	}
 
@@ -295,18 +367,22 @@ func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd *hcsshim.MappedV
 	logrus.Debugf("%s locking cachedScratchMutex", title)
 	d.cachedScratchMutex.Lock()
 	if _, err := os.Stat(d.cachedScratchFile); err != nil {
-		// TODO: Not a typo, but needs fixing when the platform sandbox stuff has been sorted out.
 		logrus.Debugf("%s (%s): creating an SVM scratch - locking serviceVM", title, context)
 		svm.Lock()
-		if err := svm.config.CreateSandbox(d.cachedScratchFile, client.DefaultSandboxSizeMB, d.cachedSandboxFile); err != nil {
-			logrus.Debugf("%s (%s): releasing serviceVM on error path", title, context)
+		if err := svm.config.CreateExt4Vhdx(scratchTargetFile, client.DefaultVhdxSizeGB, d.cachedScratchFile); err != nil {
+			logrus.Debugf("%s (%s): releasing serviceVM on error path from CreateExt4Vhdx: %s", title, context, err)
 			svm.Unlock()
 			logrus.Debugf("%s (%s): releasing cachedScratchMutex on error path", title, context)
 			d.cachedScratchMutex.Unlock()
-			// TODO: NEED TO REMOVE FROM MAP HERE AND STOP IT
+
+			// Do a force terminate and remove it from the map on failure, ignoring any errors
+			if err2 := d.terminateServiceVM(id, "error path from CreateExt4Vhdx", true); err2 != nil {
+				logrus.Warnf("failed to terminate service VM on error path from CreateExt4Vhdx: %s", err2)
+			}
+
 			return nil, fmt.Errorf("failed to create SVM scratch VHDX (%s): %s", context, err)
 		}
-		logrus.Debugf("%s (%s): releasing serviceVM on error path", title, context)
+		logrus.Debugf("%s (%s): releasing serviceVM after %s created and cached to %s", title, context, scratchTargetFile, d.cachedScratchFile)
 		svm.Unlock()
 	}
 	logrus.Debugf("%s (%s): releasing cachedScratchMutex", title, context)
@@ -314,19 +390,17 @@ func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd *hcsshim.MappedV
 
 	// Hot-add the scratch-space if not already attached
 	if !svm.scratchAttached {
-		// Make a copy of it to the layer directory
-		logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) cloning cached scratch for hot-add", context)
-		if err := client.CopyFile(d.cachedScratchFile, scratchTargetFile, true); err != nil {
-			// TODO: NEED TO REMOVE FROM MAP HERE AND STOP IT
-			return nil, err
-		}
-
 		logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) hot-adding scratch %s - locking serviceVM", context, scratchTargetFile)
 		svm.Lock()
-		if err := svm.config.HotAddVhd(scratchTargetFile, toolsScratchPath); err != nil {
-			logrus.Debugf("%s (%s): releasing serviceVM on error path", title, context)
+		if err := svm.config.HotAddVhd(scratchTargetFile, toolsScratchPath, false, true); err != nil {
+			logrus.Debugf("%s (%s): releasing serviceVM on error path of HotAddVhd: %s", title, context, err)
 			svm.Unlock()
-			// TODOL NEED TO REMOVE FROM MAP HERE AND STOP IT
+
+			// Do a force terminate and remove it from the map on failure, ignoring any errors
+			if err2 := d.terminateServiceVM(id, "error path from HotAddVhd", true); err2 != nil {
+				logrus.Warnf("failed to terminate service VM on error path from HotAddVhd: %s", err2)
+			}
+
 			return nil, fmt.Errorf("failed to hot-add %s failed: %s", scratchTargetFile, err)
 		}
 		logrus.Debugf("%s (%s): releasing serviceVM", title, context)
@@ -441,26 +515,43 @@ func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts
 		return err
 	}
 
+	// Look for an explicit sandbox size option.
+	sandboxSize := uint64(client.DefaultVhdxSizeGB)
+	for k, v := range opts.StorageOpt {
+		switch strings.ToLower(k) {
+		case "lcow.sandboxsize":
+			var err error
+			sandboxSize, err = strconv.ParseUint(v, 10, 32)
+			if err != nil {
+				return fmt.Errorf("%s failed to parse value '%s' for 'lcow.sandboxsize'", title, v)
+			}
+			if sandboxSize < client.DefaultVhdxSizeGB {
+				return fmt.Errorf("%s 'lcow.sandboxsize' option cannot be less than %d", title, client.DefaultVhdxSizeGB)
+			}
+			break
+		}
+	}
+
 	// Massive perf optimisation here. If we know that the RW layer is the default size,
 	// and that the cached sandbox already exists, and we are running in safe mode, we
 	// can just do a simple copy into the layers sandbox file without needing to start a
-	// unique service VM. For a global service VM, it doesn't really matter.
+	// unique service VM. For a global service VM, it doesn't really matter. Of course,
+	// this is only the case where the sandbox is the default size.
 	//
-	// TODO: @jhowardmsft Where are we going to get the required size from?
-	// We need to look at the CreateOpts for that, I think....
-
 	// Make sure we have the sandbox mutex taken while we are examining it.
-	logrus.Debugf("%s: locking cachedSandboxMutex", title)
-	d.cachedSandboxMutex.Lock()
-	_, err := os.Stat(d.cachedSandboxFile)
-	logrus.Debugf("%s: releasing cachedSandboxMutex", title)
-	d.cachedSandboxMutex.Unlock()
-	if err == nil {
-		logrus.Debugf("%s: using cached sandbox to populate", title)
-		if err := client.CopyFile(d.cachedSandboxFile, filepath.Join(d.dir(id), sandboxFilename), true); err != nil {
-			return err
+	if sandboxSize == client.DefaultVhdxSizeGB {
+		logrus.Debugf("%s: locking cachedSandboxMutex", title)
+		d.cachedSandboxMutex.Lock()
+		_, err := os.Stat(d.cachedSandboxFile)
+		logrus.Debugf("%s: releasing cachedSandboxMutex", title)
+		d.cachedSandboxMutex.Unlock()
+		if err == nil {
+			logrus.Debugf("%s: using cached sandbox to populate", title)
+			if err := client.CopyFile(d.cachedSandboxFile, filepath.Join(d.dir(id), sandboxFilename), true); err != nil {
+				return err
+			}
+			return nil
 		}
-		return nil
 	}
 
 	logrus.Debugf("%s: creating SVM to create sandbox", title)
@@ -470,13 +561,16 @@ func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts
 	}
 	defer d.terminateServiceVM(id, "createreadwrite", false)
 
-	// So the cached sandbox needs creating. Ensure we are the only thread creating it.
-	logrus.Debugf("%s: locking cachedSandboxMutex for creation", title)
-	d.cachedSandboxMutex.Lock()
-	defer func() {
-		logrus.Debugf("%s: releasing cachedSandboxMutex for creation", title)
-		d.cachedSandboxMutex.Unlock()
-	}()
+	// So the sandbox needs creating. If default size ensure we are the only thread populating the cache.
+	// Non-default size we don't store, just create them one-off so no need to lock the cachedSandboxMutex.
+	if sandboxSize == client.DefaultVhdxSizeGB {
+		logrus.Debugf("%s: locking cachedSandboxMutex for creation", title)
+		d.cachedSandboxMutex.Lock()
+		defer func() {
+			logrus.Debugf("%s: releasing cachedSandboxMutex for creation", title)
+			d.cachedSandboxMutex.Unlock()
+		}()
+	}
 
 	// Synchronise the operation in the service VM.
 	logrus.Debugf("%s: locking svm for sandbox creation", title)
@@ -485,7 +579,15 @@ func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts
 		logrus.Debugf("%s: releasing svm for sandbox creation", title)
 		svm.Unlock()
 	}()
-	if err := svm.config.CreateSandbox(filepath.Join(d.dir(id), sandboxFilename), client.DefaultSandboxSizeMB, d.cachedSandboxFile); err != nil {
+
+	// Make sure we don't write to our local cached copy if this is for a non-default size request.
+	targetCacheFile := d.cachedSandboxFile
+	if sandboxSize != client.DefaultVhdxSizeGB {
+		targetCacheFile = ""
+	}
+
+	// Actually do the creation.
+	if err := svm.config.CreateExt4Vhdx(filepath.Join(d.dir(id), sandboxFilename), uint32(sandboxSize), targetCacheFile); err != nil {
 		return err
 	}
 
@@ -563,42 +665,37 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
 	logrus.Debugf(title)
 
 	// Work out what we are working on
-	vhdFilename, vhdSize, isSandbox, err := getLayerDetails(d.dir(id))
+	ld, err := getLayerDetails(d.dir(id))
 	if err != nil {
 		logrus.Debugf("%s failed to get layer details from %s: %s", title, d.dir(id), err)
 		return "", fmt.Errorf("%s failed to open layer or sandbox VHD to open in %s: %s", title, d.dir(id), err)
 	}
-	logrus.Debugf("%s %s, size %d, isSandbox %t", title, vhdFilename, vhdSize, isSandbox)
+	logrus.Debugf("%s %s, size %d, isSandbox %t", title, ld.filename, ld.size, ld.isSandbox)
 
 	// Add item to cache, or update existing item, but ensure we have the
 	// lock while updating items.
 	logrus.Debugf("%s: locking cacheMutex", title)
 	d.cacheMutex.Lock()
-	var cacheEntry *cacheItem
-	if entry, ok := d.cache[id]; !ok {
+	var ci *cacheItem
+	if item, ok := d.cache[id]; !ok {
 		// The item is not currently in the cache.
-		cacheEntry = &cacheItem{
+		ci = &cacheItem{
 			refCount:  1,
-			isSandbox: isSandbox,
-			hostPath:  vhdFilename,
+			isSandbox: ld.isSandbox,
+			hostPath:  ld.filename,
 			uvmPath:   fmt.Sprintf("/mnt/%s", id),
 			isMounted: false, // we defer this as an optimisation
 		}
-		d.cache[id] = cacheEntry
-		logrus.Debugf("%s: added cache entry %+v", title, cacheEntry)
+		d.cache[id] = ci
+		logrus.Debugf("%s: added cache item %+v", title, ci)
 	} else {
 		// Increment the reference counter in the cache.
-		logrus.Debugf("%s: locking cache item for increment", title)
-		entry.Lock()
-		entry.refCount++
-		logrus.Debugf("%s: releasing cache item for increment", title)
-		entry.Unlock()
-		logrus.Debugf("%s: incremented refcount on cache entry %+v", title, cacheEntry)
+		item.incrementRefCount()
 	}
 	logrus.Debugf("%s: releasing cacheMutex", title)
 	d.cacheMutex.Unlock()
 
-	logrus.Debugf("%s %s success. %s: %+v: size %d", title, id, d.dir(id), cacheEntry, vhdSize)
+	logrus.Debugf("%s %s success. %s: %+v: size %d", title, id, d.dir(id), ci, ld.size)
 	return d.dir(id), nil
 }
 
@@ -609,67 +706,53 @@ func (d *Driver) Put(id string) error {
 
 	logrus.Debugf("%s: locking cacheMutex", title)
 	d.cacheMutex.Lock()
-	entry, ok := d.cache[id]
+	item, ok := d.cache[id]
 	if !ok {
 		logrus.Debugf("%s: releasing cacheMutex on error path", title)
 		d.cacheMutex.Unlock()
 		return fmt.Errorf("%s possible ref-count error, or invalid id was passed to the graphdriver. Cannot handle id %s as it's not in the cache", title, id)
 	}
 
-	// Are we just decrementing the reference count?
-	logrus.Debugf("%s: locking cache item for possible decrement", title)
-	entry.Lock()
-	if entry.refCount > 1 {
-		entry.refCount--
-		logrus.Debugf("%s: releasing cache item for decrement and early get-out as refCount is now %d", title, entry.refCount)
-		entry.Unlock()
-		logrus.Debugf("%s: refCount decremented to %d. Releasing cacheMutex", title, entry.refCount)
+	// Decrement the ref-count, and nothing more to do if still in use.
+	if item.decrementRefCount() > 0 {
+		logrus.Debugf("%s: releasing cacheMutex. Cache item is still in use", title)
 		d.cacheMutex.Unlock()
 		return nil
 	}
-	logrus.Debugf("%s: releasing cache item", title)
-	entry.Unlock()
-	logrus.Debugf("%s: releasing cacheMutex. Ref count has dropped to zero", title)
-	d.cacheMutex.Unlock()
 
-	// To reach this point, the reference count has dropped to zero. If we have
-	// done a mount and we are in global mode, then remove it. We don't
-	// need to remove in safe mode as the service VM is going to be torn down
-	// anyway.
+	// Remove from the cache map.
+	delete(d.cache, id)
+	logrus.Debugf("%s: releasing cacheMutex. Ref count on cache item has dropped to zero, removed from cache", title)
+	d.cacheMutex.Unlock()
 
+	// If we have done a mount and we are in global mode, then remove it. We don't
+	// need to remove in safe mode as the service VM is going to be torn down anyway.
 	if d.globalMode {
 		logrus.Debugf("%s: locking cache item at zero ref-count", title)
-		entry.Lock()
+		item.Lock()
 		defer func() {
 			logrus.Debugf("%s: releasing cache item at zero ref-count", title)
-			entry.Unlock()
+			item.Unlock()
 		}()
-		if entry.isMounted {
+		if item.isMounted {
 			svm, err := d.getServiceVM(id, false)
 			if err != nil {
 				return err
 			}
 
-			logrus.Debugf("%s: Hot-Removing %s. Locking svm", title, entry.hostPath)
+			logrus.Debugf("%s: Hot-Removing %s. Locking svm", title, item.hostPath)
 			svm.Lock()
-			if err := svm.config.HotRemoveVhd(entry.hostPath); err != nil {
+			if err := svm.config.HotRemoveVhd(item.hostPath); err != nil {
 				logrus.Debugf("%s: releasing svm on error path", title)
 				svm.Unlock()
-				return fmt.Errorf("%s failed to hot-remove %s from global service utility VM: %s", title, entry.hostPath, err)
+				return fmt.Errorf("%s failed to hot-remove %s from global service utility VM: %s", title, item.hostPath, err)
 			}
 			logrus.Debugf("%s: releasing svm", title)
 			svm.Unlock()
 		}
 	}
 
-	// Remove from the cache map.
-	logrus.Debugf("%s: Locking cacheMutex to delete item from cache", title)
-	d.cacheMutex.Lock()
-	delete(d.cache, id)
-	logrus.Debugf("%s: releasing cacheMutex after item deleted from cache", title)
-	d.cacheMutex.Unlock()
-
-	logrus.Debugf("%s %s: refCount 0. %s (%s) completed successfully", title, id, entry.hostPath, entry.uvmPath)
+	logrus.Debugf("%s %s: refCount 0. %s (%s) completed successfully", title, id, item.hostPath, item.uvmPath)
 	return nil
 }
 
@@ -681,7 +764,7 @@ func (d *Driver) Cleanup() error {
 
 	d.cacheMutex.Lock()
 	for k, v := range d.cache {
-		logrus.Debugf("%s cache entry: %s: %+v", title, k, v)
+		logrus.Debugf("%s cache item: %s: %+v", title, k, v)
 		if v.refCount > 0 {
 			logrus.Warnf("%s leaked %s: %+v", title, k, v)
 		}
@@ -713,7 +796,7 @@ func (d *Driver) Cleanup() error {
 	// Cleanup any service VMs we have running, along with their scratch spaces.
 	// We don't take the lock for this as it's taken in terminateServiceVm.
 	for k, v := range d.serviceVms {
-		logrus.Debugf("%s svm entry: %s: %+v", title, k, v)
+		logrus.Debugf("%s svm: %s: %+v", title, k, v)
 		d.terminateServiceVM(k, "cleanup", true)
 	}
 
@@ -737,28 +820,28 @@ func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
 		d.cacheMutex.Unlock()
 		return nil, fmt.Errorf("%s fail as %s is not in the cache", title, id)
 	}
-	cacheEntry := d.cache[id]
+	ci := d.cache[id]
 	logrus.Debugf("%s: releasing cacheMutex", title)
 	d.cacheMutex.Unlock()
 
 	// Stat to get size
-	logrus.Debugf("%s: locking cacheEntry", title)
-	cacheEntry.Lock()
-	fileInfo, err := os.Stat(cacheEntry.hostPath)
+	logrus.Debugf("%s: locking cacheItem", title)
+	ci.Lock()
+	fileInfo, err := os.Stat(ci.hostPath)
 	if err != nil {
-		logrus.Debugf("%s: releasing cacheEntry on error path", title)
-		cacheEntry.Unlock()
-		return nil, fmt.Errorf("%s failed to stat %s: %s", title, cacheEntry.hostPath, err)
+		logrus.Debugf("%s: releasing cacheItem on error path", title)
+		ci.Unlock()
+		return nil, fmt.Errorf("%s failed to stat %s: %s", title, ci.hostPath, err)
 	}
-	logrus.Debugf("%s: releasing cacheEntry", title)
-	cacheEntry.Unlock()
+	logrus.Debugf("%s: releasing cacheItem", title)
+	ci.Unlock()
 
 	// Start the SVM with a mapped virtual disk. Note that if the SVM is
 	// already runing and we are in global mode, this will be
 	// hot-added.
 	mvd := &hcsshim.MappedVirtualDisk{
-		HostPath:          cacheEntry.hostPath,
-		ContainerPath:     cacheEntry.uvmPath,
+		HostPath:          ci.hostPath,
+		ContainerPath:     ci.uvmPath,
 		CreateInUtilityVM: true,
 		ReadOnly:          true,
 	}
@@ -769,8 +852,8 @@ func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
 		return nil, err
 	}
 
-	// Set `isMounted` for the cache entry. Note that we re-scan the cache
-	// at this point as it's possible the cacheEntry changed during the long-
+	// Set `isMounted` for the cache item. Note that we re-scan the cache
+	// at this point as it's possible the cacheItem changed during the long-
 	// running operation above when we weren't holding the cacheMutex lock.
 	logrus.Debugf("%s: locking cacheMutex for updating isMounted", title)
 	d.cacheMutex.Lock()
@@ -780,18 +863,14 @@ func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
 		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
 		return nil, fmt.Errorf("%s fail as %s is not in the cache when updating isMounted", title, id)
 	}
-	cacheEntry = d.cache[id]
-	logrus.Debugf("%s: locking cacheEntry for updating isMounted", title)
-	cacheEntry.Lock()
-	cacheEntry.isMounted = true
-	logrus.Debugf("%s: releasing cacheEntry for updating isMounted", title)
-	cacheEntry.Unlock()
+	ci = d.cache[id]
+	ci.setIsMounted()
 	logrus.Debugf("%s: releasing cacheMutex for updating isMounted", title)
 	d.cacheMutex.Unlock()
 
 	// Obtain the tar stream for it
-	logrus.Debugf("%s %s, size %d, isSandbox %t", title, cacheEntry.hostPath, fileInfo.Size(), cacheEntry.isSandbox)
-	tarReadCloser, err := svm.config.VhdToTar(cacheEntry.hostPath, cacheEntry.uvmPath, cacheEntry.isSandbox, fileInfo.Size())
+	logrus.Debugf("%s %s, size %d, isSandbox %t", title, ci.hostPath, fileInfo.Size(), ci.isSandbox)
+	tarReadCloser, err := svm.config.VhdToTar(ci.hostPath, ci.uvmPath, ci.isSandbox, fileInfo.Size())
 	if err != nil {
 		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
 		return nil, fmt.Errorf("%s failed to export layer to tar stream for id: %s, parent: %s : %s", title, id, parent, err)
@@ -909,21 +988,22 @@ func (d *Driver) setLayerChain(id string, chain []string) error {
 // getLayerDetails is a utility for getting a file name, size and indication of
 // sandbox for a VHD(x) in a folder. A read-only layer will be layer.vhd. A
 // read-write layer will be sandbox.vhdx.
-func getLayerDetails(folder string) (string, int64, bool, error) {
+func getLayerDetails(folder string) (*layerDetails, error) {
 	var fileInfo os.FileInfo
-	isSandbox := false
-	filename := filepath.Join(folder, layerFilename)
-	var err error
-
-	if fileInfo, err = os.Stat(filename); err != nil {
-		filename = filepath.Join(folder, sandboxFilename)
-		if fileInfo, err = os.Stat(filename); err != nil {
-			if os.IsNotExist(err) {
-				return "", 0, isSandbox, fmt.Errorf("could not find layer or sandbox in %s", folder)
-			}
-			return "", 0, isSandbox, fmt.Errorf("error locating layer or sandbox in %s: %s", folder, err)
+	ld := &layerDetails{
+		isSandbox: false,
+		filename:  filepath.Join(folder, layerFilename),
+	}
+
+	fileInfo, err := os.Stat(ld.filename)
+	if err != nil {
+		ld.filename = filepath.Join(folder, sandboxFilename)
+		if fileInfo, err = os.Stat(ld.filename); err != nil {
+			return nil, fmt.Errorf("failed to locate layer or sandbox in %s", folder)
 		}
-		isSandbox = true
+		ld.isSandbox = true
 	}
-	return filename, fileInfo.Size(), isSandbox, nil
+	ld.size = fileInfo.Size()
+
+	return ld, nil
 }

+ 2 - 2
vendor.conf

@@ -1,14 +1,14 @@
 # the following lines are in sorted order, FYI
 github.com/Azure/go-ansiterm 19f72df4d05d31cbe1c56bfc8045c96babff6c7e
 github.com/Microsoft/hcsshim v0.6.1
-github.com/Microsoft/go-winio v0.4.2
+github.com/Microsoft/go-winio v0.4.4
 github.com/moby/buildkit da2b9dc7dab99e824b2b1067ad7d0523e32dd2d9 https://github.com/dmcgowan/buildkit.git
 github.com/davecgh/go-spew 346938d642f2ec3594ed81d874461961cd0faa76
 github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
 github.com/go-check/check 4ed411733c5785b40214c70bce814c3a3a689609 https://github.com/cpuguy83/check.git
 github.com/gorilla/context v1.1
 github.com/gorilla/mux v1.1
-github.com/jhowardmsft/opengcs b9d0120d36f26e981a50bf18bac1bb3f0c2b8fef https://github.com/dmcgowan/opengcs.git
+github.com/jhowardmsft/opengcs v0.0.12
 github.com/kr/pty 5cf931ef8f
 github.com/mattn/go-shellwords v1.0.3
 github.com/sirupsen/logrus v1.0.1

+ 14 - 7
vendor/github.com/Microsoft/go-winio/file.go

@@ -23,6 +23,13 @@ type atomicBool int32
 func (b *atomicBool) isSet() bool { return atomic.LoadInt32((*int32)(b)) != 0 }
 func (b *atomicBool) setFalse()   { atomic.StoreInt32((*int32)(b), 0) }
 func (b *atomicBool) setTrue()    { atomic.StoreInt32((*int32)(b), 1) }
+func (b *atomicBool) swap(new bool) bool {
+	var newInt int32
+	if new {
+		newInt = 1
+	}
+	return atomic.SwapInt32((*int32)(b), newInt) == 1
+}
 
 const (
 	cFILE_SKIP_COMPLETION_PORT_ON_SUCCESS = 1
@@ -71,7 +78,7 @@ func initIo() {
 type win32File struct {
 	handle        syscall.Handle
 	wg            sync.WaitGroup
-	closing       bool
+	closing       atomicBool
 	readDeadline  deadlineHandler
 	writeDeadline deadlineHandler
 }
@@ -107,9 +114,9 @@ func MakeOpenFile(h syscall.Handle) (io.ReadWriteCloser, error) {
 
 // closeHandle closes the resources associated with a Win32 handle
 func (f *win32File) closeHandle() {
-	if !f.closing {
+	// Atomically set that we are closing, releasing the resources only once.
+	if !f.closing.swap(true) {
 		// cancel all IO and wait for it to complete
-		f.closing = true
 		cancelIoEx(f.handle, nil)
 		f.wg.Wait()
 		// at this point, no new IO can start
@@ -127,10 +134,10 @@ func (f *win32File) Close() error {
 // prepareIo prepares for a new IO operation.
 // The caller must call f.wg.Done() when the IO is finished, prior to Close() returning.
 func (f *win32File) prepareIo() (*ioOperation, error) {
-	f.wg.Add(1)
-	if f.closing {
+	if f.closing.isSet() {
 		return nil, ErrFileClosed
 	}
+	f.wg.Add(1)
 	c := &ioOperation{}
 	c.ch = make(chan ioResult)
 	return c, nil
@@ -159,7 +166,7 @@ func (f *win32File) asyncIo(c *ioOperation, d *deadlineHandler, bytes uint32, er
 		return int(bytes), err
 	}
 
-	if f.closing {
+	if f.closing.isSet() {
 		cancelIoEx(f.handle, &c.o)
 	}
 
@@ -175,7 +182,7 @@ func (f *win32File) asyncIo(c *ioOperation, d *deadlineHandler, bytes uint32, er
 	case r = <-c.ch:
 		err = r.err
 		if err == syscall.ERROR_OPERATION_ABORTED {
-			if f.closing {
+			if f.closing.isSet() {
 				err = ErrFileClosed
 			}
 		}

+ 82 - 0
vendor/github.com/Microsoft/go-winio/vhd/vhd.go

@@ -0,0 +1,82 @@
+// +build windows
+
+package vhd
+
+import "syscall"
+
+//go:generate go run mksyscall_windows.go -output zvhd.go vhd.go
+
+//sys createVirtualDisk(virtualStorageType *virtualStorageType, path string, virtualDiskAccessMask uint32, securityDescriptor *uintptr, flags uint32, providerSpecificFlags uint32, parameters *createVirtualDiskParameters, o *syscall.Overlapped, handle *syscall.Handle) (err error) [failretval != 0] = VirtDisk.CreateVirtualDisk
+
+type virtualStorageType struct {
+	DeviceID uint32
+	VendorID [16]byte
+}
+
+const virtualDiskAccessNONE uint32 = 0
+const virtualDiskAccessATTACHRO uint32 = 65536
+const virtualDiskAccessATTACHRW uint32 = 131072
+const virtualDiskAccessDETACH uint32 = 262144
+const virtualDiskAccessGETINFO uint32 = 524288
+const virtualDiskAccessCREATE uint32 = 1048576
+const virtualDiskAccessMETAOPS uint32 = 2097152
+const virtualDiskAccessREAD uint32 = 851968
+const virtualDiskAccessALL uint32 = 4128768
+const virtualDiskAccessWRITABLE uint32 = 3276800
+
+const createVirtualDiskFlagNone uint32 = 0
+const createVirtualDiskFlagFullPhysicalAllocation uint32 = 1
+const createVirtualDiskFlagPreventWritesToSourceDisk uint32 = 2
+const createVirtualDiskFlagDoNotCopyMetadataFromParent uint32 = 4
+
+type version2 struct {
+	UniqueID                 [16]byte // GUID
+	MaximumSize              uint64
+	BlockSizeInBytes         uint32
+	SectorSizeInBytes        uint32
+	ParentPath               *uint16 // string
+	SourcePath               *uint16 // string
+	OpenFlags                uint32
+	ParentVirtualStorageType virtualStorageType
+	SourceVirtualStorageType virtualStorageType
+	ResiliencyGUID           [16]byte // GUID
+}
+
+type createVirtualDiskParameters struct {
+	Version  uint32 // Must always be set to 2
+	Version2 version2
+}
+
+// CreateVhdx will create a simple vhdx file at the given path using default values.
+func CreateVhdx(path string, maxSizeInGb, blockSizeInMb uint32) error {
+	var defaultType virtualStorageType
+
+	parameters := createVirtualDiskParameters{
+		Version: 2,
+		Version2: version2{
+			MaximumSize:      uint64(maxSizeInGb) * 1024 * 1024 * 1024,
+			BlockSizeInBytes: blockSizeInMb * 1024 * 1024,
+		},
+	}
+
+	var handle syscall.Handle
+
+	if err := createVirtualDisk(
+		&defaultType,
+		path,
+		virtualDiskAccessNONE,
+		nil,
+		createVirtualDiskFlagNone,
+		0,
+		&parameters,
+		nil,
+		&handle); err != nil {
+		return err
+	}
+
+	if err := syscall.CloseHandle(handle); err != nil {
+		return err
+	}
+
+	return nil
+}

+ 64 - 0
vendor/github.com/Microsoft/go-winio/vhd/zvhd.go

@@ -0,0 +1,64 @@
+// MACHINE GENERATED BY 'go generate' COMMAND; DO NOT EDIT
+
+package vhd
+
+import (
+	"syscall"
+	"unsafe"
+
+	"golang.org/x/sys/windows"
+)
+
+var _ unsafe.Pointer
+
+// Do the interface allocations only once for common
+// Errno values.
+const (
+	errnoERROR_IO_PENDING = 997
+)
+
+var (
+	errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING)
+)
+
+// errnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+	switch e {
+	case 0:
+		return nil
+	case errnoERROR_IO_PENDING:
+		return errERROR_IO_PENDING
+	}
+	// TODO: add more here, after collecting data on the common
+	// error values see on Windows. (perhaps when running
+	// all.bat?)
+	return e
+}
+
+var (
+	modVirtDisk = windows.NewLazySystemDLL("VirtDisk.dll")
+
+	procCreateVirtualDisk = modVirtDisk.NewProc("CreateVirtualDisk")
+)
+
+func createVirtualDisk(virtualStorageType *virtualStorageType, path string, virtualDiskAccessMask uint32, securityDescriptor *uintptr, flags uint32, providerSpecificFlags uint32, parameters *createVirtualDiskParameters, o *syscall.Overlapped, handle *syscall.Handle) (err error) {
+	var _p0 *uint16
+	_p0, err = syscall.UTF16PtrFromString(path)
+	if err != nil {
+		return
+	}
+	return _createVirtualDisk(virtualStorageType, _p0, virtualDiskAccessMask, securityDescriptor, flags, providerSpecificFlags, parameters, o, handle)
+}
+
+func _createVirtualDisk(virtualStorageType *virtualStorageType, path *uint16, virtualDiskAccessMask uint32, securityDescriptor *uintptr, flags uint32, providerSpecificFlags uint32, parameters *createVirtualDiskParameters, o *syscall.Overlapped, handle *syscall.Handle) (err error) {
+	r1, _, e1 := syscall.Syscall9(procCreateVirtualDisk.Addr(), 9, uintptr(unsafe.Pointer(virtualStorageType)), uintptr(unsafe.Pointer(path)), uintptr(virtualDiskAccessMask), uintptr(unsafe.Pointer(securityDescriptor)), uintptr(flags), uintptr(providerSpecificFlags), uintptr(unsafe.Pointer(parameters)), uintptr(unsafe.Pointer(o)), uintptr(unsafe.Pointer(handle)))
+	if r1 != 0 {
+		if e1 != 0 {
+			err = errnoErr(e1)
+		} else {
+			err = syscall.EINVAL
+		}
+	}
+	return
+}

+ 111 - 87
vendor/github.com/jhowardmsft/opengcs/gogcs/client/config.go

@@ -39,21 +39,20 @@ const (
 	// defaultUvmTimeoutSeconds is the default time to wait for utility VM operations
 	defaultUvmTimeoutSeconds = 5 * 60
 
-	// DefaultSandboxSizeMB is the size of the default sandbox size in MB
-	DefaultSandboxSizeMB = 20 * 1024 * 1024
+	// DefaultVhdxSizeGB is the size of the default sandbox & scratch in GB
+	DefaultVhdxSizeGB = 20
+
+	// defaultVhdxBlockSizeMB is the block-size for the sandbox/scratch VHDx's this package can create.
+	defaultVhdxBlockSizeMB = 1
 )
 
-// Config is the structure used to configuring a utility VM to be used
-// as a service VM. There are two ways of starting. Either supply a VHD,
-// or a Kernel+Initrd. For the latter, both must be supplied, and both
-// must be in the same directory.
+// Config is the structure used to configuring a utility VM. There are two ways
+// of starting. Either supply a VHD, or a Kernel+Initrd. For the latter, both
+// must be supplied, and both must be in the same directory.
 //
 // VHD is the priority.
 type Config struct {
-	KirdPath           string                      // Path to where kernel/initrd are found (defaults to c:\program files\Linux Containers)
-	KernelFile         string                      // Kernel for Utility VM (embedded in a UEFI bootloader) - does NOT include full path, just filename
-	InitrdFile         string                      // Initrd image for Utility VM - does NOT include full path, just filename
-	Vhdx               string                      // VHD for booting the utility VM - is a full path
+	Options                                        // Configuration options
 	Name               string                      // Name of the utility VM
 	RequestedMode      Mode                        // What mode is preferred when validating
 	ActualMode         Mode                        // What mode was obtained during validation
@@ -62,105 +61,129 @@ type Config struct {
 	MappedVirtualDisks []hcsshim.MappedVirtualDisk // Data-disks to be attached
 }
 
-// GenerateDefault generates a default config from a set of options
-// If baseDir is not supplied, defaults to $env:ProgramFiles\Linux Containers
-func (config *Config) GenerateDefault(options []string) error {
-	if config.UvmTimeoutSeconds < 0 {
-		return fmt.Errorf("opengcs: cannot generate a config when supplied a negative utility VM timeout")
-	}
-
-	envTimeoutSeconds := 0
-	optTimeoutSeconds := 0
-
-	if config.UvmTimeoutSeconds != 0 {
-		envTimeout := os.Getenv("OPENGCS_UVM_TIMEOUT_SECONDS")
-		if len(envTimeout) > 0 {
-			var err error
-			if envTimeoutSeconds, err = strconv.Atoi(envTimeout); err != nil {
-				return fmt.Errorf("opengcs: OPENGCS_UVM_TIMEOUT_SECONDS could not be interpreted as an integer")
-			}
-			if envTimeoutSeconds < 0 {
-				return fmt.Errorf("opengcs: OPENGCS_UVM_TIMEOUT_SECONDS cannot be negative")
-			}
-		}
-	}
+// Options is the structure used by a client to define configurable options for a utility VM.
+type Options struct {
+	KirdPath       string // Path to where kernel/initrd are found (defaults to %PROGRAMFILES%\Linux Containers)
+	KernelFile     string // Kernel for Utility VM (embedded in a UEFI bootloader) - does NOT include full path, just filename
+	InitrdFile     string // Initrd image for Utility VM - does NOT include full path, just filename
+	Vhdx           string // VHD for booting the utility VM - is a full path
+	TimeoutSeconds int    // Requested time for the utility VM to respond in seconds (may be over-ridden by environment)
+	BootParameters string // Additional boot parameters for initrd booting (not VHDx)
+}
 
+// ParseOptions parses a set of K-V pairs into options used by opengcs. Note
+// for consistency with the LCOW graphdriver in docker, we keep the same
+// convention of an `lcow.` prefix.
+func ParseOptions(options []string) (Options, error) {
+	rOpts := Options{TimeoutSeconds: 0}
 	for _, v := range options {
 		opt := strings.SplitN(v, "=", 2)
 		if len(opt) == 2 {
 			switch strings.ToLower(opt[0]) {
-			case "opengcskirdpath":
-				config.KirdPath = opt[1]
-			case "opengcskernel":
-				config.KernelFile = opt[1]
-			case "opengcsinitrd":
-				config.InitrdFile = opt[1]
-			case "opengcsvhdx":
-				config.Vhdx = opt[1]
-			case "opengcstimeoutsecs":
+			case "lcow.kirdpath":
+				rOpts.KirdPath = opt[1]
+			case "lcow.kernel":
+				rOpts.KernelFile = opt[1]
+			case "lcow.initrd":
+				rOpts.InitrdFile = opt[1]
+			case "lcow.vhdx":
+				rOpts.Vhdx = opt[1]
+			case "lcow.bootparameters":
+				rOpts.BootParameters = opt[1]
+			case "lcow.timeout":
 				var err error
-				if optTimeoutSeconds, err = strconv.Atoi(opt[1]); err != nil {
-					return fmt.Errorf("opengcs: opengcstimeoutsecs option could not be interpreted as an integer")
+				if rOpts.TimeoutSeconds, err = strconv.Atoi(opt[1]); err != nil {
+					return rOpts, fmt.Errorf("opengcstimeoutsecs option could not be interpreted as an integer")
 				}
-				if optTimeoutSeconds < 0 {
-					return fmt.Errorf("opengcs: opengcstimeoutsecs option cannot be negative")
+				if rOpts.TimeoutSeconds < 0 {
+					return rOpts, fmt.Errorf("opengcstimeoutsecs option cannot be negative")
 				}
 			}
 		}
 	}
 
-	if config.KirdPath == "" {
-		config.KirdPath = filepath.Join(os.Getenv("ProgramFiles"), "Linux Containers")
+	// Set default values if not supplied
+	if rOpts.KirdPath == "" {
+		rOpts.KirdPath = filepath.Join(os.Getenv("ProgramFiles"), "Linux Containers")
 	}
-
-	if config.Vhdx == "" {
-		config.Vhdx = filepath.Join(config.KirdPath, `uvm.vhdx`)
+	if rOpts.Vhdx == "" {
+		rOpts.Vhdx = filepath.Join(rOpts.KirdPath, `uvm.vhdx`)
 	}
-	if config.KernelFile == "" {
-		config.KernelFile = `bootx64.efi`
+	if rOpts.KernelFile == "" {
+		rOpts.KernelFile = `bootx64.efi`
 	}
-	if config.InitrdFile == "" {
-		config.InitrdFile = `initrd.img`
+	if rOpts.InitrdFile == "" {
+		rOpts.InitrdFile = `initrd.img`
 	}
 
-	// Which timeout are we going to take? If not through option or environment,
-	// then use the default constant, otherwise the maximum of the option or
-	// environment supplied setting. A requested on in the config supplied
-	// overrides all of this.
-	if config.UvmTimeoutSeconds == 0 {
-		config.UvmTimeoutSeconds = defaultUvmTimeoutSeconds
-		if optTimeoutSeconds != 0 || envTimeoutSeconds != 0 {
-			config.UvmTimeoutSeconds = optTimeoutSeconds
-			if envTimeoutSeconds > optTimeoutSeconds {
-				config.UvmTimeoutSeconds = envTimeoutSeconds
-			}
+	return rOpts, nil
+}
+
+// GenerateDefault generates a default config from a set of options
+// If baseDir is not supplied, defaults to $env:ProgramFiles\Linux Containers
+func (config *Config) GenerateDefault(options []string) error {
+	// Parse the options that the user supplied.
+	var err error
+	config.Options, err = ParseOptions(options)
+	if err != nil {
+		return err
+	}
+
+	// Get the timeout from the environment
+	envTimeoutSeconds := 0
+	envTimeout := os.Getenv("OPENGCS_UVM_TIMEOUT_SECONDS")
+	if len(envTimeout) > 0 {
+		var err error
+		if envTimeoutSeconds, err = strconv.Atoi(envTimeout); err != nil {
+			return fmt.Errorf("OPENGCS_UVM_TIMEOUT_SECONDS could not be interpreted as an integer")
 		}
+		if envTimeoutSeconds < 0 {
+			return fmt.Errorf("OPENGCS_UVM_TIMEOUT_SECONDS cannot be negative")
+		}
+	}
+
+	// Priority to the requested timeout from the options.
+	if config.TimeoutSeconds != 0 {
+		config.UvmTimeoutSeconds = config.TimeoutSeconds
+		return nil
 	}
 
-	config.MappedVirtualDisks = nil
+	// Next priority, the environment
+	if envTimeoutSeconds != 0 {
+		config.UvmTimeoutSeconds = envTimeoutSeconds
+		return nil
+	}
+
+	// Last priority is the default timeout
+	config.UvmTimeoutSeconds = defaultUvmTimeoutSeconds
 
 	return nil
 }
 
-// validate validates a Config structure for starting a utility VM.
-func (config *Config) validate() error {
+// Validate validates a Config structure for starting a utility VM.
+func (config *Config) Validate() error {
 	config.ActualMode = ModeActualError
 
 	if config.RequestedMode == ModeRequestVhdx && config.Vhdx == "" {
-		return fmt.Errorf("opengcs: config is invalid - request for VHDX mode did not supply a VHDX")
+		return fmt.Errorf("VHDx mode must supply a VHDx")
 	}
 	if config.RequestedMode == ModeRequestKernelInitrd && (config.KernelFile == "" || config.InitrdFile == "") {
-		return fmt.Errorf("opengcs: config is invalid - request for Kernel+Initrd mode must supply both kernel and initrd")
+		return fmt.Errorf("kernel+initrd mode must supply both kernel and initrd")
 	}
 
 	// Validate that if VHDX requested or auto, it exists.
 	if config.RequestedMode == ModeRequestAuto || config.RequestedMode == ModeRequestVhdx {
 		if _, err := os.Stat(config.Vhdx); os.IsNotExist(err) {
 			if config.RequestedMode == ModeRequestVhdx {
-				return fmt.Errorf("opengcs: mode requested was VHDX but '%s' could not be found", config.Vhdx)
+				return fmt.Errorf("VHDx '%s' not found", config.Vhdx)
 			}
 		} else {
 			config.ActualMode = ModeActualVhdx
+
+			// Can't specify boot parameters with VHDx
+			if config.BootParameters != "" {
+				return fmt.Errorf("Boot parameters cannot be specified in VHDx mode")
+			}
 			return nil
 		}
 	}
@@ -168,16 +191,16 @@ func (config *Config) validate() error {
 	// So must be kernel+initrd, or auto where we fallback as the VHDX doesn't exist
 	if config.InitrdFile == "" || config.KernelFile == "" {
 		if config.RequestedMode == ModeRequestKernelInitrd {
-			return fmt.Errorf("opengcs: both initrd and kernel options for utility VM boot must be supplied")
+			return fmt.Errorf("initrd and kernel options must be supplied")
 		}
 		return fmt.Errorf("opengcs: configuration is invalid")
 	}
 
 	if _, err := os.Stat(filepath.Join(config.KirdPath, config.KernelFile)); os.IsNotExist(err) {
-		return fmt.Errorf("opengcs: kernel '%s' was not found", filepath.Join(config.KirdPath, config.KernelFile))
+		return fmt.Errorf("kernel '%s' not found", filepath.Join(config.KirdPath, config.KernelFile))
 	}
 	if _, err := os.Stat(filepath.Join(config.KirdPath, config.InitrdFile)); os.IsNotExist(err) {
-		return fmt.Errorf("opengcs: initrd '%s' was not found", filepath.Join(config.KirdPath, config.InitrdFile))
+		return fmt.Errorf("initrd '%s' not found", filepath.Join(config.KirdPath, config.InitrdFile))
 	}
 
 	config.ActualMode = ModeActualKernelInitrd
@@ -185,21 +208,21 @@ func (config *Config) validate() error {
 	// Ensure all the MappedVirtualDisks exist on the host
 	for _, mvd := range config.MappedVirtualDisks {
 		if _, err := os.Stat(mvd.HostPath); err != nil {
-			return fmt.Errorf("opengcs: MappedVirtualDisk '%s' was not found", mvd.HostPath)
+			return fmt.Errorf("mapped virtual disk '%s' not found", mvd.HostPath)
 		}
 		if mvd.ContainerPath == "" {
-			return fmt.Errorf("opengcs: MappedVirtualDisk '%s' has no container path", mvd.HostPath)
+			return fmt.Errorf("mapped virtual disk '%s' requested without a container path", mvd.HostPath)
 		}
 	}
 
 	return nil
 }
 
-// Create creates a utility VM from a configuration.
-func (config *Config) Create() error {
-	logrus.Debugf("opengcs Create: %+v", config)
+// StartUtilityVM creates and starts a utility VM from a configuration.
+func (config *Config) StartUtilityVM() error {
+	logrus.Debugf("opengcs: StartUtilityVM: %+v", config)
 
-	if err := config.validate(); err != nil {
+	if err := config.Validate(); err != nil {
 		return err
 	}
 
@@ -218,28 +241,29 @@ func (config *Config) Create() error {
 		}
 	} else {
 		configuration.HvRuntime = &hcsshim.HvRuntime{
-			ImagePath:       config.KirdPath,
-			LinuxInitrdFile: config.InitrdFile,
-			LinuxKernelFile: config.KernelFile,
+			ImagePath:           config.KirdPath,
+			LinuxInitrdFile:     config.InitrdFile,
+			LinuxKernelFile:     config.KernelFile,
+			LinuxBootParameters: config.BootParameters,
 		}
 	}
 
 	configurationS, _ := json.Marshal(configuration)
-	logrus.Debugf("opengcs Create: calling HCS with '%s'", string(configurationS))
+	logrus.Debugf("opengcs: StartUtilityVM: calling HCS with '%s'", string(configurationS))
 	uvm, err := hcsshim.CreateContainer(config.Name, configuration)
 	if err != nil {
 		return err
 	}
-	logrus.Debugf("opengcs Create: uvm created, starting...")
+	logrus.Debugf("opengcs: StartUtilityVM: uvm created, starting...")
 	err = uvm.Start()
 	if err != nil {
-		logrus.Debugf("opengcs Create: uvm failed to start: %s", err)
+		logrus.Debugf("opengcs: StartUtilityVM: uvm failed to start: %s", err)
 		// Make sure we don't leave it laying around as it's been created in HCS
 		uvm.Terminate()
 		return err
 	}
 
 	config.Uvm = uvm
-	logrus.Debugf("opengcs Create: uvm %s is running", config.Name)
+	logrus.Debugf("opengcs StartUtilityVM: uvm %s is running", config.Name)
 	return nil
 }

+ 165 - 0
vendor/github.com/jhowardmsft/opengcs/gogcs/client/createext4vhdx.go

@@ -0,0 +1,165 @@
+// +build windows
+
+package client
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"strings"
+	"time"
+
+	winio "github.com/Microsoft/go-winio/vhd"
+	//	"github.com/Microsoft/hcsshim"
+	"github.com/sirupsen/logrus"
+)
+
+// dismount is a simple utility function wrapping a conditional HotRemove. It would
+// have been easier if you could cancel a deferred function, but this works just
+// as well.
+func (config *Config) dismount(file string) error {
+	logrus.Debugf("opengcs: CreateExt4Vhdx: hot-remove of %s", file)
+	err := config.HotRemoveVhd(file)
+	if err != nil {
+		logrus.Warnf("failed to hot-remove: %s", err)
+	}
+	return err
+}
+
+// CreateExt4Vhdx does what it says on the tin. It is the responsibility of the caller to synchronise
+// simultaneous attempts to create the cache file.
+func (config *Config) CreateExt4Vhdx(destFile string, sizeGB uint32, cacheFile string) error {
+	// Smallest we can accept is the default sandbox size as we can't size down, only expand.
+	if sizeGB < DefaultVhdxSizeGB {
+		sizeGB = DefaultVhdxSizeGB
+	}
+
+	logrus.Debugf("opengcs: CreateExt4Vhdx: %s size:%dGB cache:%s", destFile, sizeGB, cacheFile)
+
+	// Retrieve from cache if the default size and already on disk
+	if cacheFile != "" && sizeGB == DefaultVhdxSizeGB {
+		if _, err := os.Stat(cacheFile); err == nil {
+			if err := CopyFile(cacheFile, destFile, false); err != nil {
+				return fmt.Errorf("failed to copy cached file '%s' to '%s': %s", cacheFile, destFile, err)
+			}
+			logrus.Debugf("opengcs: CreateExt4Vhdx: %s fulfilled from cache", destFile)
+			return nil
+		}
+	}
+
+	// Must have a utility VM to operate on
+	if config.Uvm == nil {
+		return fmt.Errorf("no utility VM")
+	}
+
+	// Create the VHDX
+	if err := winio.CreateVhdx(destFile, sizeGB, defaultVhdxBlockSizeMB); err != nil {
+		return fmt.Errorf("failed to create VHDx %s: %s", destFile, err)
+	}
+
+	// Attach it to the utility VM, but don't mount it (as there's no filesystem on it)
+	if err := config.HotAddVhd(destFile, "", false, false); err != nil {
+		return fmt.Errorf("opengcs: CreateExt4Vhdx: failed to hot-add %s to utility VM: %s", cacheFile, err)
+	}
+
+	// Get the list of mapped virtual disks to find the controller and LUN IDs
+	logrus.Debugf("opengcs: CreateExt4Vhdx: %s querying mapped virtual disks", destFile)
+	mvdControllers, err := config.Uvm.MappedVirtualDisks()
+	if err != nil {
+		return fmt.Errorf("failed to get mapped virtual disks: %s", err)
+	}
+
+	// Find our mapped disk from the list of all currently added.
+	controller := -1
+	lun := -1
+	for controllerNumber, controllerElement := range mvdControllers {
+		for diskNumber, diskElement := range controllerElement.MappedVirtualDisks {
+			if diskElement.HostPath == destFile {
+				controller = controllerNumber
+				lun = diskNumber
+				break
+			}
+		}
+	}
+	if controller == -1 || lun == -1 {
+		config.dismount(destFile)
+		return fmt.Errorf("failed to find %s in mapped virtual disks after hot-adding", destFile)
+	}
+	logrus.Debugf("opengcs: CreateExt4Vhdx: %s at C=%d L=%d", destFile, controller, lun)
+
+	// Validate /sys/bus/scsi/devices/C:0:0:L exists as a directory
+	testdCommand := fmt.Sprintf(`test -d /sys/bus/scsi/devices/%d:0:0:%d`, controller, lun)
+	testdProc, err := config.RunProcess(testdCommand, nil, nil, nil)
+	if err != nil {
+		config.dismount(destFile)
+		return fmt.Errorf("failed to `%s` following hot-add %s to utility VM: %s", testdCommand, destFile, err)
+	}
+	defer testdProc.Close()
+	testdProc.WaitTimeout(time.Duration(int(time.Second) * config.UvmTimeoutSeconds))
+	testdExitCode, err := testdProc.ExitCode()
+	if err != nil {
+		config.dismount(destFile)
+		return fmt.Errorf("failed to get exit code from `%s` following hot-add %s to utility VM: %s", testdCommand, destFile, err)
+	}
+	if testdExitCode != 0 {
+		config.dismount(destFile)
+		return fmt.Errorf("`%s` return non-zero exit code (%d) following hot-add %s to utility VM", testdCommand, testdExitCode, destFile)
+	}
+
+	// Get the device from under the block subdirectory by doing a simple ls. This will come back as (eg) `sda`
+	lsCommand := fmt.Sprintf(`ls /sys/bus/scsi/devices/%d:0:0:%d/block`, controller, lun)
+	var lsOutput bytes.Buffer
+	lsProc, err := config.RunProcess(lsCommand, nil, &lsOutput, nil)
+	if err != nil {
+		config.dismount(destFile)
+		return fmt.Errorf("failed to `%s` following hot-add %s to utility VM: %s", lsCommand, destFile, err)
+	}
+	defer lsProc.Close()
+	lsProc.WaitTimeout(time.Duration(int(time.Second) * config.UvmTimeoutSeconds))
+	lsExitCode, err := lsProc.ExitCode()
+	if err != nil {
+		config.dismount(destFile)
+		return fmt.Errorf("failed to get exit code from `%s` following hot-add %s to utility VM: %s", lsCommand, destFile, err)
+	}
+	if lsExitCode != 0 {
+		config.dismount(destFile)
+		return fmt.Errorf("`%s` return non-zero exit code (%d) following hot-add %s to utility VM", lsCommand, lsExitCode, destFile)
+	}
+	device := fmt.Sprintf(`/dev/%s`, strings.TrimSpace(lsOutput.String()))
+	logrus.Debugf("opengcs: CreateExt4Vhdx: %s: device at %s", destFile, device)
+
+	// Format it ext4
+	mkfsCommand := fmt.Sprintf(`mkfs.ext4 -q -E lazy_itable_init=1 -O ^has_journal,sparse_super2,uninit_bg,^resize_inode %s`, device)
+	var mkfsStderr bytes.Buffer
+	mkfsProc, err := config.RunProcess(mkfsCommand, nil, nil, &mkfsStderr)
+	if err != nil {
+		config.dismount(destFile)
+		return fmt.Errorf("failed to RunProcess %q following hot-add %s to utility VM: %s", destFile, mkfsCommand, err)
+	}
+	defer mkfsProc.Close()
+	mkfsProc.WaitTimeout(time.Duration(int(time.Second) * config.UvmTimeoutSeconds))
+	mkfsExitCode, err := mkfsProc.ExitCode()
+	if err != nil {
+		config.dismount(destFile)
+		return fmt.Errorf("failed to get exit code from `%s` following hot-add %s to utility VM: %s", mkfsCommand, destFile, err)
+	}
+	if mkfsExitCode != 0 {
+		config.dismount(destFile)
+		return fmt.Errorf("`%s` return non-zero exit code (%d) following hot-add %s to utility VM: %s", mkfsCommand, mkfsExitCode, destFile, strings.TrimSpace(mkfsStderr.String()))
+	}
+
+	// Dismount before we copy it
+	if err := config.dismount(destFile); err != nil {
+		return fmt.Errorf("failed to hot-remove: %s", err)
+	}
+
+	// Populate the cache.
+	if cacheFile != "" && (sizeGB == DefaultVhdxSizeGB) {
+		if err := CopyFile(destFile, cacheFile, true); err != nil {
+			return fmt.Errorf("failed to seed cache '%s' from '%s': %s", destFile, cacheFile, err)
+		}
+	}
+
+	logrus.Debugf("opengcs: CreateExt4Vhdx: %s created (non-cache)", destFile)
+	return nil
+}

+ 0 - 67
vendor/github.com/jhowardmsft/opengcs/gogcs/client/createsandbox.go

@@ -1,67 +0,0 @@
-// +build windows
-
-package client
-
-import (
-	"fmt"
-	"os"
-
-	"github.com/sirupsen/logrus"
-)
-
-// CreateSandbox does what it says on the tin. This is done by copying a prebuilt-sandbox from the ServiceVM.
-// It is the responsibility of the caller to synchronise simultaneous attempts to create the cache file.
-// TODO: @jhowardmsft maxSizeInMB isn't hooked up in GCS. Needs a platform change which is in flight.
-func (config *Config) CreateSandbox(destFile string, maxSizeInMB uint32, cacheFile string) error {
-	// Smallest we can accept is the default sandbox size as we can't size down, only expand.
-	if maxSizeInMB < DefaultSandboxSizeMB {
-		maxSizeInMB = DefaultSandboxSizeMB
-	}
-
-	logrus.Debugf("opengcs: CreateSandbox: %s size:%dMB cache:%s", destFile, maxSizeInMB, cacheFile)
-
-	// Retrieve from cache if the default size and already on disk
-	if cacheFile != "" && maxSizeInMB == DefaultSandboxSizeMB {
-		if _, err := os.Stat(cacheFile); err == nil {
-			if err := CopyFile(cacheFile, destFile, false); err != nil {
-				return fmt.Errorf("opengcs: CreateSandbox: Failed to copy cached sandbox '%s' to '%s': %s", cacheFile, destFile, err)
-			}
-			logrus.Debugf("opengcs: CreateSandbox: %s fulfilled from cache", destFile)
-			return nil
-		}
-	}
-
-	if config.Uvm == nil {
-		return fmt.Errorf("opengcs: CreateSandbox: No utility VM has been created")
-	}
-
-	// TODO @jhowardmsft - needs a platform change so that can specify size. eg fmt.Sprintf("createSandbox -size %d", maxSizeInMB))
-	process, err := config.createUtilsProcess("createSandbox")
-	if err != nil {
-		return fmt.Errorf("opengcs: CreateSandbox: %s: failed to create utils process: %s", destFile, err)
-	}
-
-	defer func() {
-		process.Process.Close()
-	}()
-
-	logrus.Debugf("opengcs: CreateSandbox: %s: writing from stdout", destFile)
-	// Get back the sandbox VHDx stream from the service VM and write it to file
-	resultSize, err := writeFileFromReader(destFile, process.Stdout, config.UvmTimeoutSeconds, fmt.Sprintf("createSandbox %s", destFile))
-	if err != nil {
-		return fmt.Errorf("opengcs: CreateSandbox: %s: failed writing %d bytes to target file: %s", destFile, resultSize, err)
-	}
-
-	// Populate the cache
-	if cacheFile != "" && maxSizeInMB == DefaultSandboxSizeMB {
-		// It may already exist due to being created on another thread, in which case no copy back needed.
-		if _, err := os.Stat(cacheFile); os.IsNotExist(err) {
-			if err := CopyFile(destFile, cacheFile, false); err != nil {
-				return fmt.Errorf("opengcs: CreateSandbox: Failed to seed sandbox cache '%s' from '%s': %s", destFile, cacheFile, err)
-			}
-		}
-	}
-
-	logrus.Debugf("opengcs: CreateSandbox: %s created (non-cache)", destFile)
-	return nil
-}

+ 5 - 4
vendor/github.com/jhowardmsft/opengcs/gogcs/client/hotaddvhd.go

@@ -13,7 +13,7 @@ import (
 // service-VM per host scenario. In order to do a graphdriver `Diff`, we hot-add the
 // sandbox to /mnt/<id> so that we can run `exportSandbox` inside the utility VM to
 // get a tar-stream of the sandboxes contents back to the daemon.
-func (config *Config) HotAddVhd(hostPath string, containerPath string) error {
+func (config *Config) HotAddVhd(hostPath string, containerPath string, readOnly bool, mount bool) error {
 	logrus.Debugf("opengcs: HotAddVhd: %s: %s", hostPath, containerPath)
 
 	if config.Uvm == nil {
@@ -26,13 +26,14 @@ func (config *Config) HotAddVhd(hostPath string, containerPath string) error {
 			HostPath:          hostPath,
 			ContainerPath:     containerPath,
 			CreateInUtilityVM: true,
-			//ReadOnly:          true,
+			ReadOnly:          readOnly,
+			AttachOnly:        !mount,
 		},
 		Request: "Add",
 	}
-	logrus.Debugf("opengcs: HotAddVhd: %s to %s", hostPath, containerPath)
+
 	if err := config.Uvm.Modify(modification); err != nil {
-		return fmt.Errorf("opengcs: HotAddVhd: failed: %s", err)
+		return fmt.Errorf("failed to modify utility VM configuration for hot-add: %s", err)
 	}
 	logrus.Debugf("opengcs: HotAddVhd: %s added successfully", hostPath)
 	return nil

+ 1 - 1
vendor/github.com/jhowardmsft/opengcs/gogcs/client/hotremovevhd.go

@@ -27,7 +27,7 @@ func (config *Config) HotRemoveVhd(hostPath string) error {
 		Request: "Remove",
 	}
 	if err := config.Uvm.Modify(modification); err != nil {
-		return fmt.Errorf("opengcs: HotRemoveVhd: %s failed: %s", hostPath, err)
+		return fmt.Errorf("failed modifying utility VM for hot-remove %s: %s", hostPath, err)
 	}
 	logrus.Debugf("opengcs: HotRemoveVhd: %s removed successfully", hostPath)
 	return nil

+ 26 - 14
vendor/github.com/jhowardmsft/opengcs/gogcs/client/process.go

@@ -15,6 +15,7 @@ type process struct {
 	Process hcsshim.Process
 	Stdin   io.WriteCloser
 	Stdout  io.ReadCloser
+	Stderr  io.ReadCloser
 }
 
 // createUtilsProcess is a convenient wrapper for hcsshim.createUtilsProcess to use when
@@ -45,13 +46,13 @@ func (config *Config) createUtilsProcess(commandLine string) (process, error) {
 	}
 	proc.Process, err = config.Uvm.CreateProcess(processConfig)
 	if err != nil {
-		return process{}, fmt.Errorf("opengcs: createUtilsProcess: CreateProcess %+v failed %s", config, err)
+		return process{}, fmt.Errorf("failed to create process (%+v) in utility VM: %s", config, err)
 	}
 
-	if proc.Stdin, proc.Stdout, _, err = proc.Process.Stdio(); err != nil {
+	if proc.Stdin, proc.Stdout, proc.Stderr, err = proc.Process.Stdio(); err != nil {
 		proc.Process.Kill() // Should this have a timeout?
 		proc.Process.Close()
-		return process{}, fmt.Errorf("opengcs: createUtilsProcess: failed to get Stdio pipes %s", err)
+		return process{}, fmt.Errorf("failed to get stdio pipes for process %+v: %s", config, err)
 	}
 
 	logrus.Debugf("opengcs: createUtilsProcess success: pid %d", proc.Process.Pid())
@@ -60,41 +61,52 @@ func (config *Config) createUtilsProcess(commandLine string) (process, error) {
 
 // RunProcess runs the given command line program in the utilityVM. It takes in
 // an input to the reader to feed into stdin and returns stdout to output.
-func (config *Config) RunProcess(commandLine string, input io.Reader, output io.Writer) error {
+// IMPORTANT: It is the responsibility of the caller to call Close() on the returned process.
+func (config *Config) RunProcess(commandLine string, stdin io.Reader, stdout io.Writer, stderr io.Writer) (hcsshim.Process, error) {
 	logrus.Debugf("opengcs: RunProcess: %s", commandLine)
 	process, err := config.createUtilsProcess(commandLine)
 	if err != nil {
-		return err
+		return nil, err
 	}
-	defer process.Process.Close()
 
 	// Send the data into the process's stdin
-	if input != nil {
+	if stdin != nil {
 		if _, err = copyWithTimeout(process.Stdin,
-			input,
+			stdin,
 			0,
 			config.UvmTimeoutSeconds,
 			fmt.Sprintf("send to stdin of %s", commandLine)); err != nil {
-			return err
+			return nil, err
 		}
 
 		// Don't need stdin now we've sent everything. This signals GCS that we are finished sending data.
 		if err := process.Process.CloseStdin(); err != nil {
-			return err
+			return nil, err
 		}
 	}
 
-	if output != nil {
+	if stdout != nil {
 		// Copy the data over to the writer.
-		if _, err := copyWithTimeout(output,
+		if _, err := copyWithTimeout(stdout,
 			process.Stdout,
 			0,
 			config.UvmTimeoutSeconds,
 			fmt.Sprintf("RunProcess: copy back from %s", commandLine)); err != nil {
-			return err
+			return nil, err
+		}
+	}
+
+	if stderr != nil {
+		// Copy the data over to the writer.
+		if _, err := copyWithTimeout(stderr,
+			process.Stderr,
+			0,
+			config.UvmTimeoutSeconds,
+			fmt.Sprintf("RunProcess: copy back from %s", commandLine)); err != nil {
+			return nil, err
 		}
 	}
 
 	logrus.Debugf("opengcs: runProcess success: %s", commandLine)
-	return nil
+	return process.Process, nil
 }

+ 4 - 4
vendor/github.com/jhowardmsft/opengcs/gogcs/client/tartovhd.go

@@ -19,24 +19,24 @@ func (config *Config) TarToVhd(targetVHDFile string, reader io.Reader) (int64, e
 
 	process, err := config.createUtilsProcess("tar2vhd")
 	if err != nil {
-		return 0, fmt.Errorf("opengcs: TarToVhd: %s: failed to create utils process tar2vhd: %s", targetVHDFile, err)
+		return 0, fmt.Errorf("failed to start tar2vhd for %s: %s", targetVHDFile, err)
 	}
 	defer process.Process.Close()
 
 	// Send the tarstream into the `tar2vhd`s stdin
 	if _, err = copyWithTimeout(process.Stdin, reader, 0, config.UvmTimeoutSeconds, fmt.Sprintf("stdin of tar2vhd for generating %s", targetVHDFile)); err != nil {
-		return 0, fmt.Errorf("opengcs: TarToVhd: %s: failed to send to tar2vhd in uvm: %s", targetVHDFile, err)
+		return 0, fmt.Errorf("failed sending to tar2vhd for %s: %s", targetVHDFile, err)
 	}
 
 	// Don't need stdin now we've sent everything. This signals GCS that we are finished sending data.
 	if err := process.Process.CloseStdin(); err != nil {
-		return 0, fmt.Errorf("opengcs: TarToVhd: %s: failed closing stdin handle: %s", targetVHDFile, err)
+		return 0, fmt.Errorf("failed closing stdin handle for %s: %s", targetVHDFile, err)
 	}
 
 	// Write stdout contents of `tar2vhd` to the VHD file
 	payloadSize, err := writeFileFromReader(targetVHDFile, process.Stdout, config.UvmTimeoutSeconds, fmt.Sprintf("stdout of tar2vhd to %s", targetVHDFile))
 	if err != nil {
-		return 0, fmt.Errorf("opengcs: TarToVhd: %s: failed writing VHD file: %s", targetVHDFile, err)
+		return 0, fmt.Errorf("failed to write %s during tar2vhd: %s", targetVHDFile, err)
 	}
 
 	logrus.Debugf("opengcs: TarToVhd: %s created, %d bytes", targetVHDFile, payloadSize)