lcow.go 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. // +build windows
  2. // Maintainer: jhowardmsft
  3. // Locale: en-gb
  4. // About: Graph-driver for Linux Containers On Windows (LCOW)
  5. //
  6. // This graphdriver runs in two modes. Yet to be determined which one will
  7. // be the shipping mode. The global mode is where a single utility VM
  8. // is used for all service VM tool operations. This isn't safe security-wise
  9. // as it's attaching a sandbox of multiple containers to it, containing
  10. // untrusted data. This may be fine for client devops scenarios. In
  11. // safe mode, a unique utility VM is instantiated for all service VM tool
  12. // operations. The downside of safe-mode is that operations are slower as
  13. // a new service utility VM has to be started and torn-down when needed.
  14. //
  15. // Options:
  16. //
  17. // The following options are read by the graphdriver itself:
  18. //
  19. // * lcow.globalmode - Enables global service VM Mode
  20. // -- Possible values: true/false
  21. // -- Default if omitted: false
  22. //
  23. // * lcow.sandboxsize - Specifies a custom sandbox size in GB for starting a container
  24. // -- Possible values: >= default sandbox size (opengcs defined, currently 20)
  25. // -- Default if omitted: 20
  26. //
  27. // The following options are read by opengcs:
  28. //
  29. // * lcow.kirdpath - Specifies a custom path to a kernel/initrd pair
  30. // -- Possible values: Any local path that is not a mapped drive
  31. // -- Default if omitted: %ProgramFiles%\Linux Containers
  32. //
  33. // * lcow.kernel - Specifies a custom kernel file located in the `lcow.kirdpath` path
  34. // -- Possible values: Any valid filename
  35. // -- Default if omitted: bootx64.efi
  36. //
  37. // * lcow.initrd - Specifies a custom initrd file located in the `lcow.kirdpath` path
  38. // -- Possible values: Any valid filename
  39. // -- Default if omitted: initrd.img
  40. //
  41. // * lcow.bootparameters - Specifies additional boot parameters for booting in kernel+initrd mode
  42. // -- Possible values: Any valid linux kernel boot options
  43. // -- Default if omitted: <nil>
  44. //
  45. // * lcow.vhdx - Specifies a custom vhdx file to boot (instead of a kernel+initrd)
  46. // -- Possible values: Any valid filename
  47. // -- Default if omitted: uvm.vhdx under `lcow.kirdpath`
  48. //
  49. // * lcow.timeout - Specifies a timeout for utility VM operations in seconds
  50. // -- Possible values: >=0
  51. // -- Default if omitted: 300
  52. // TODO: Grab logs from SVM at terminate or errors
  53. package lcow
  54. import (
  55. "encoding/json"
  56. "fmt"
  57. "io"
  58. "io/ioutil"
  59. "os"
  60. "path/filepath"
  61. "strconv"
  62. "strings"
  63. "sync"
  64. "syscall"
  65. "time"
  66. "github.com/Microsoft/hcsshim"
  67. "github.com/Microsoft/opengcs/client"
  68. "github.com/docker/docker/daemon/graphdriver"
  69. "github.com/docker/docker/pkg/archive"
  70. "github.com/docker/docker/pkg/containerfs"
  71. "github.com/docker/docker/pkg/idtools"
  72. "github.com/docker/docker/pkg/ioutils"
  73. "github.com/docker/docker/pkg/system"
  74. "github.com/sirupsen/logrus"
  75. )
  76. // init registers this driver to the register. It gets initialised by the
  77. // function passed in the second parameter, implemented in this file.
  78. func init() {
  79. graphdriver.Register("lcow", InitDriver)
  80. }
  81. const (
  82. // sandboxFilename is the name of the file containing a layer's sandbox (read-write layer).
  83. sandboxFilename = "sandbox.vhdx"
  84. // scratchFilename is the name of the scratch-space used by an SVM to avoid running out of memory.
  85. scratchFilename = "scratch.vhdx"
  86. // layerFilename is the name of the file containing a layer's read-only contents.
  87. // Note this really is VHD format, not VHDX.
  88. layerFilename = "layer.vhd"
  89. // toolsScratchPath is a location in a service utility VM that the tools can use as a
  90. // scratch space to avoid running out of memory.
  91. toolsScratchPath = "/tmp/scratch"
  92. // svmGlobalID is the ID used in the serviceVMs map for the global service VM when running in "global" mode.
  93. svmGlobalID = "_lcow_global_svm_"
  94. // cacheDirectory is the sub-folder under the driver's data-root used to cache blank sandbox and scratch VHDs.
  95. cacheDirectory = "cache"
  96. // scratchDirectory is the sub-folder under the driver's data-root used for scratch VHDs in service VMs
  97. scratchDirectory = "scratch"
  98. // errOperationPending is the HRESULT returned by the HCS when the VM termination operation is still pending.
  99. errOperationPending syscall.Errno = 0xc0370103
  100. )
  101. // Driver represents an LCOW graph driver.
  102. type Driver struct {
  103. dataRoot string // Root path on the host where we are storing everything.
  104. cachedSandboxFile string // Location of the local default-sized cached sandbox.
  105. cachedSandboxMutex sync.Mutex // Protects race conditions from multiple threads creating the cached sandbox.
  106. cachedScratchFile string // Location of the local cached empty scratch space.
  107. cachedScratchMutex sync.Mutex // Protects race conditions from multiple threads creating the cached scratch.
  108. options []string // Graphdriver options we are initialised with.
  109. globalMode bool // Indicates if running in an unsafe/global service VM mode.
  110. // NOTE: It is OK to use a cache here because Windows does not support
  111. // restoring containers when the daemon dies.
  112. serviceVms *serviceVMMap // Map of the configs representing the service VM(s) we are running.
  113. }
  114. // layerDetails is the structure returned by a helper function `getLayerDetails`
  115. // for getting information about a layer folder
  116. type layerDetails struct {
  117. filename string // \path\to\sandbox.vhdx or \path\to\layer.vhd
  118. size int64 // size of the above file
  119. isSandbox bool // true if sandbox.vhdx
  120. }
  121. // deletefiles is a helper function for initialisation where we delete any
  122. // left-over scratch files in case we were previously forcibly terminated.
  123. func deletefiles(path string, f os.FileInfo, err error) error {
  124. if strings.HasSuffix(f.Name(), ".vhdx") {
  125. logrus.Warnf("lcowdriver: init: deleting stale scratch file %s", path)
  126. return os.Remove(path)
  127. }
  128. return nil
  129. }
  130. // InitDriver returns a new LCOW storage driver.
  131. func InitDriver(dataRoot string, options []string, _, _ []idtools.IDMap) (graphdriver.Driver, error) {
  132. title := "lcowdriver: init:"
  133. cd := filepath.Join(dataRoot, cacheDirectory)
  134. sd := filepath.Join(dataRoot, scratchDirectory)
  135. d := &Driver{
  136. dataRoot: dataRoot,
  137. options: options,
  138. cachedSandboxFile: filepath.Join(cd, sandboxFilename),
  139. cachedScratchFile: filepath.Join(cd, scratchFilename),
  140. serviceVms: &serviceVMMap{
  141. svms: make(map[string]*serviceVMMapItem),
  142. },
  143. globalMode: false,
  144. }
  145. // Looks for relevant options
  146. for _, v := range options {
  147. opt := strings.SplitN(v, "=", 2)
  148. if len(opt) == 2 {
  149. switch strings.ToLower(opt[0]) {
  150. case "lcow.globalmode":
  151. var err error
  152. d.globalMode, err = strconv.ParseBool(opt[1])
  153. if err != nil {
  154. return nil, fmt.Errorf("%s failed to parse value for 'lcow.globalmode' - must be 'true' or 'false'", title)
  155. }
  156. break
  157. }
  158. }
  159. }
  160. // Make sure the dataRoot directory is created
  161. if err := idtools.MkdirAllAndChown(dataRoot, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil {
  162. return nil, fmt.Errorf("%s failed to create '%s': %v", title, dataRoot, err)
  163. }
  164. // Make sure the cache directory is created under dataRoot
  165. if err := idtools.MkdirAllAndChown(cd, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil {
  166. return nil, fmt.Errorf("%s failed to create '%s': %v", title, cd, err)
  167. }
  168. // Make sure the scratch directory is created under dataRoot
  169. if err := idtools.MkdirAllAndChown(sd, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil {
  170. return nil, fmt.Errorf("%s failed to create '%s': %v", title, sd, err)
  171. }
  172. // Delete any items in the scratch directory
  173. filepath.Walk(sd, deletefiles)
  174. logrus.Infof("%s dataRoot: %s globalMode: %t", title, dataRoot, d.globalMode)
  175. return d, nil
  176. }
  177. func (d *Driver) getVMID(id string) string {
  178. if d.globalMode {
  179. return svmGlobalID
  180. }
  181. return id
  182. }
  183. // startServiceVMIfNotRunning starts a service utility VM if it is not currently running.
  184. // It can optionally be started with a mapped virtual disk. Returns a opengcs config structure
  185. // representing the VM.
  186. func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd []hcsshim.MappedVirtualDisk, context string) (_ *serviceVM, err error) {
  187. // Use the global ID if in global mode
  188. id = d.getVMID(id)
  189. title := fmt.Sprintf("lcowdriver: startservicevmifnotrunning %s:", id)
  190. // Attempt to add ID to the service vm map
  191. logrus.Debugf("%s: Adding entry to service vm map", title)
  192. svm, exists, err := d.serviceVms.add(id)
  193. if err != nil && err == errVMisTerminating {
  194. // VM is in the process of terminating. Wait until it's done and and then try again
  195. logrus.Debugf("%s: VM with current ID still in the process of terminating: %s", title, id)
  196. if err := svm.getStopError(); err != nil {
  197. logrus.Debugf("%s: VM %s did not stop successfully: %s", title, id, err)
  198. return nil, err
  199. }
  200. return d.startServiceVMIfNotRunning(id, mvdToAdd, context)
  201. } else if err != nil {
  202. logrus.Debugf("%s: failed to add service vm to map: %s", err)
  203. return nil, fmt.Errorf("%s: failed to add to service vm map: %s", title, err)
  204. }
  205. if exists {
  206. // Service VM is already up and running. In this case, just hot add the vhds.
  207. logrus.Debugf("%s: service vm already exists. Just hot adding: %+v", title, mvdToAdd)
  208. if err := svm.hotAddVHDs(mvdToAdd...); err != nil {
  209. logrus.Debugf("%s: failed to hot add vhds on service vm creation: %s", title, err)
  210. return nil, fmt.Errorf("%s: failed to hot add vhds on service vm: %s", title, err)
  211. }
  212. return svm, nil
  213. }
  214. // We are the first service for this id, so we need to start it
  215. logrus.Debugf("%s: service vm doesn't exist. Now starting it up: %s", title, id)
  216. defer func() {
  217. // Signal that start has finished, passing in the error if any.
  218. svm.signalStartFinished(err)
  219. if err != nil {
  220. // We added a ref to the VM, since we failed, we should delete the ref.
  221. d.terminateServiceVM(id, "error path on startServiceVMIfNotRunning", false)
  222. }
  223. }()
  224. // Generate a default configuration
  225. if err := svm.config.GenerateDefault(d.options); err != nil {
  226. return nil, fmt.Errorf("%s failed to generate default gogcs configuration for global svm (%s): %s", title, context, err)
  227. }
  228. // For the name, we deliberately suffix if safe-mode to ensure that it doesn't
  229. // clash with another utility VM which may be running for the container itself.
  230. // This also makes it easier to correlate through Get-ComputeProcess.
  231. if id == svmGlobalID {
  232. svm.config.Name = svmGlobalID
  233. } else {
  234. svm.config.Name = fmt.Sprintf("%s_svm", id)
  235. }
  236. // Ensure we take the cached scratch mutex around the check to ensure the file is complete
  237. // and not in the process of being created by another thread.
  238. scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
  239. logrus.Debugf("%s locking cachedScratchMutex", title)
  240. d.cachedScratchMutex.Lock()
  241. if _, err := os.Stat(d.cachedScratchFile); err == nil {
  242. // Make a copy of cached scratch to the scratch directory
  243. logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) cloning cached scratch for mvd", context)
  244. if err := client.CopyFile(d.cachedScratchFile, scratchTargetFile, true); err != nil {
  245. logrus.Debugf("%s releasing cachedScratchMutex on err: %s", title, err)
  246. d.cachedScratchMutex.Unlock()
  247. return nil, err
  248. }
  249. // Add the cached clone as a mapped virtual disk
  250. logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) adding cloned scratch as mvd", context)
  251. mvd := hcsshim.MappedVirtualDisk{
  252. HostPath: scratchTargetFile,
  253. ContainerPath: toolsScratchPath,
  254. CreateInUtilityVM: true,
  255. }
  256. svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvd)
  257. svm.scratchAttached = true
  258. }
  259. logrus.Debugf("%s releasing cachedScratchMutex", title)
  260. d.cachedScratchMutex.Unlock()
  261. // If requested to start it with a mapped virtual disk, add it now.
  262. svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvdToAdd...)
  263. for _, mvd := range svm.config.MappedVirtualDisks {
  264. svm.attachedVHDs[mvd.HostPath] = 1
  265. }
  266. // Start it.
  267. logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) starting %s", context, svm.config.Name)
  268. if err := svm.config.StartUtilityVM(); err != nil {
  269. return nil, fmt.Errorf("failed to start service utility VM (%s): %s", context, err)
  270. }
  271. // defer function to terminate the VM if the next steps fail
  272. defer func() {
  273. if err != nil {
  274. waitTerminate(svm, fmt.Sprintf("startServiceVmIfNotRunning: %s (%s)", id, context))
  275. }
  276. }()
  277. // Now we have a running service VM, we can create the cached scratch file if it doesn't exist.
  278. logrus.Debugf("%s locking cachedScratchMutex", title)
  279. d.cachedScratchMutex.Lock()
  280. if _, err := os.Stat(d.cachedScratchFile); err != nil {
  281. logrus.Debugf("%s (%s): creating an SVM scratch", title, context)
  282. // Don't use svm.CreateExt4Vhdx since that only works when the service vm is setup,
  283. // but we're still in that process right now.
  284. if err := svm.config.CreateExt4Vhdx(scratchTargetFile, client.DefaultVhdxSizeGB, d.cachedScratchFile); err != nil {
  285. logrus.Debugf("%s (%s): releasing cachedScratchMutex on error path", title, context)
  286. d.cachedScratchMutex.Unlock()
  287. logrus.Debugf("%s: failed to create vm scratch %s: %s", title, scratchTargetFile, err)
  288. return nil, fmt.Errorf("failed to create SVM scratch VHDX (%s): %s", context, err)
  289. }
  290. }
  291. logrus.Debugf("%s (%s): releasing cachedScratchMutex", title, context)
  292. d.cachedScratchMutex.Unlock()
  293. // Hot-add the scratch-space if not already attached
  294. if !svm.scratchAttached {
  295. logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) hot-adding scratch %s", context, scratchTargetFile)
  296. if err := svm.hotAddVHDsAtStart(hcsshim.MappedVirtualDisk{
  297. HostPath: scratchTargetFile,
  298. ContainerPath: toolsScratchPath,
  299. CreateInUtilityVM: true,
  300. }); err != nil {
  301. logrus.Debugf("%s: failed to hot-add scratch %s: %s", title, scratchTargetFile, err)
  302. return nil, fmt.Errorf("failed to hot-add %s failed: %s", scratchTargetFile, err)
  303. }
  304. svm.scratchAttached = true
  305. }
  306. logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) success", context)
  307. return svm, nil
  308. }
  309. // terminateServiceVM terminates a service utility VM if its running if it's,
  310. // not being used by any goroutine, but does nothing when in global mode as it's
  311. // lifetime is limited to that of the daemon. If the force flag is set, then
  312. // the VM will be killed regardless of the ref count or if it's global.
  313. func (d *Driver) terminateServiceVM(id, context string, force bool) (err error) {
  314. // We don't do anything in safe mode unless the force flag has been passed, which
  315. // is only the case for cleanup at driver termination.
  316. if d.globalMode && !force {
  317. logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - doing nothing as in global mode", id, context)
  318. return nil
  319. }
  320. id = d.getVMID(id)
  321. var svm *serviceVM
  322. var lastRef bool
  323. if !force {
  324. // In the not force case, we ref count
  325. svm, lastRef, err = d.serviceVms.decrementRefCount(id)
  326. } else {
  327. // In the force case, we ignore the ref count and just set it to 0
  328. svm, err = d.serviceVms.setRefCountZero(id)
  329. lastRef = true
  330. }
  331. if err == errVMUnknown {
  332. return nil
  333. } else if err == errVMisTerminating {
  334. return svm.getStopError()
  335. } else if !lastRef {
  336. return nil
  337. }
  338. // We run the deletion of the scratch as a deferred function to at least attempt
  339. // clean-up in case of errors.
  340. defer func() {
  341. if svm.scratchAttached {
  342. scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
  343. logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - deleting scratch %s", id, context, scratchTargetFile)
  344. if errRemove := os.Remove(scratchTargetFile); errRemove != nil {
  345. logrus.Warnf("failed to remove scratch file %s (%s): %s", scratchTargetFile, context, errRemove)
  346. err = errRemove
  347. }
  348. }
  349. // This function shouldn't actually return error unless there is a bug
  350. if errDelete := d.serviceVms.deleteID(id); errDelete != nil {
  351. logrus.Warnf("failed to service vm from svm map %s (%s): %s", id, context, errDelete)
  352. }
  353. // Signal that this VM has stopped
  354. svm.signalStopFinished(err)
  355. }()
  356. // Now it's possible that the serivce VM failed to start and now we are trying to termiante it.
  357. // In this case, we will relay the error to the goroutines waiting for this vm to stop.
  358. if err := svm.getStartError(); err != nil {
  359. logrus.Debugf("lcowdriver: terminateservicevm: %s had failed to start up: %s", id, err)
  360. return err
  361. }
  362. if err := waitTerminate(svm, fmt.Sprintf("terminateservicevm: %s (%s)", id, context)); err != nil {
  363. return err
  364. }
  365. logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - success", id, context)
  366. return nil
  367. }
  368. func waitTerminate(svm *serviceVM, context string) error {
  369. if svm.config == nil {
  370. return fmt.Errorf("lcowdriver: waitTermiante: Nil utility VM. %s", context)
  371. }
  372. logrus.Debugf("lcowdriver: waitTerminate: Calling terminate: %s", context)
  373. if err := svm.config.Uvm.Terminate(); err != nil {
  374. // We might get operation still pending from the HCS. In that case, we shouldn't return
  375. // an error since we call wait right after.
  376. underlyingError := err
  377. if conterr, ok := err.(*hcsshim.ContainerError); ok {
  378. underlyingError = conterr.Err
  379. }
  380. if syscallErr, ok := underlyingError.(syscall.Errno); ok {
  381. underlyingError = syscallErr
  382. }
  383. if underlyingError != errOperationPending {
  384. return fmt.Errorf("failed to terminate utility VM (%s): %s", context, err)
  385. }
  386. logrus.Debugf("lcowdriver: waitTerminate: uvm.Terminate() returned operation pending (%s)", context)
  387. }
  388. logrus.Debugf("lcowdriver: waitTerminate: (%s) - waiting for utility VM to terminate", context)
  389. if err := svm.config.Uvm.WaitTimeout(time.Duration(svm.config.UvmTimeoutSeconds) * time.Second); err != nil {
  390. return fmt.Errorf("failed waiting for utility VM to terminate (%s): %s", context, err)
  391. }
  392. return nil
  393. }
  394. // String returns the string representation of a driver. This should match
  395. // the name the graph driver has been registered with.
  396. func (d *Driver) String() string {
  397. return "lcow"
  398. }
  399. // Status returns the status of the driver.
  400. func (d *Driver) Status() [][2]string {
  401. return [][2]string{
  402. {"LCOW", ""},
  403. // TODO: Add some more info here - mode, home, ....
  404. }
  405. }
  406. // Exists returns true if the given id is registered with this driver.
  407. func (d *Driver) Exists(id string) bool {
  408. _, err := os.Lstat(d.dir(id))
  409. logrus.Debugf("lcowdriver: exists: id %s %t", id, err == nil)
  410. return err == nil
  411. }
  412. // CreateReadWrite creates a layer that is writable for use as a container
  413. // file system. That equates to creating a sandbox.
  414. func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
  415. title := fmt.Sprintf("lcowdriver: createreadwrite: id %s", id)
  416. logrus.Debugf(title)
  417. // First we need to create the folder
  418. if err := d.Create(id, parent, opts); err != nil {
  419. return err
  420. }
  421. // Look for an explicit sandbox size option.
  422. sandboxSize := uint64(client.DefaultVhdxSizeGB)
  423. for k, v := range opts.StorageOpt {
  424. switch strings.ToLower(k) {
  425. case "lcow.sandboxsize":
  426. var err error
  427. sandboxSize, err = strconv.ParseUint(v, 10, 32)
  428. if err != nil {
  429. return fmt.Errorf("%s failed to parse value '%s' for 'lcow.sandboxsize'", title, v)
  430. }
  431. if sandboxSize < client.DefaultVhdxSizeGB {
  432. return fmt.Errorf("%s 'lcow.sandboxsize' option cannot be less than %d", title, client.DefaultVhdxSizeGB)
  433. }
  434. break
  435. }
  436. }
  437. // Massive perf optimisation here. If we know that the RW layer is the default size,
  438. // and that the cached sandbox already exists, and we are running in safe mode, we
  439. // can just do a simple copy into the layers sandbox file without needing to start a
  440. // unique service VM. For a global service VM, it doesn't really matter. Of course,
  441. // this is only the case where the sandbox is the default size.
  442. //
  443. // Make sure we have the sandbox mutex taken while we are examining it.
  444. if sandboxSize == client.DefaultVhdxSizeGB {
  445. logrus.Debugf("%s: locking cachedSandboxMutex", title)
  446. d.cachedSandboxMutex.Lock()
  447. _, err := os.Stat(d.cachedSandboxFile)
  448. logrus.Debugf("%s: releasing cachedSandboxMutex", title)
  449. d.cachedSandboxMutex.Unlock()
  450. if err == nil {
  451. logrus.Debugf("%s: using cached sandbox to populate", title)
  452. if err := client.CopyFile(d.cachedSandboxFile, filepath.Join(d.dir(id), sandboxFilename), true); err != nil {
  453. return err
  454. }
  455. return nil
  456. }
  457. }
  458. logrus.Debugf("%s: creating SVM to create sandbox", title)
  459. svm, err := d.startServiceVMIfNotRunning(id, nil, "createreadwrite")
  460. if err != nil {
  461. return err
  462. }
  463. defer d.terminateServiceVM(id, "createreadwrite", false)
  464. // So the sandbox needs creating. If default size ensure we are the only thread populating the cache.
  465. // Non-default size we don't store, just create them one-off so no need to lock the cachedSandboxMutex.
  466. if sandboxSize == client.DefaultVhdxSizeGB {
  467. logrus.Debugf("%s: locking cachedSandboxMutex for creation", title)
  468. d.cachedSandboxMutex.Lock()
  469. defer func() {
  470. logrus.Debugf("%s: releasing cachedSandboxMutex for creation", title)
  471. d.cachedSandboxMutex.Unlock()
  472. }()
  473. }
  474. // Make sure we don't write to our local cached copy if this is for a non-default size request.
  475. targetCacheFile := d.cachedSandboxFile
  476. if sandboxSize != client.DefaultVhdxSizeGB {
  477. targetCacheFile = ""
  478. }
  479. // Create the ext4 vhdx
  480. logrus.Debugf("%s: creating sandbox ext4 vhdx", title)
  481. if err := svm.createExt4VHDX(filepath.Join(d.dir(id), sandboxFilename), uint32(sandboxSize), targetCacheFile); err != nil {
  482. logrus.Debugf("%s: failed to create sandbox vhdx for %s: %s", title, id, err)
  483. return err
  484. }
  485. return nil
  486. }
  487. // Create creates the folder for the layer with the given id, and
  488. // adds it to the layer chain.
  489. func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
  490. logrus.Debugf("lcowdriver: create: id %s parent: %s", id, parent)
  491. parentChain, err := d.getLayerChain(parent)
  492. if err != nil {
  493. return err
  494. }
  495. var layerChain []string
  496. if parent != "" {
  497. if !d.Exists(parent) {
  498. return fmt.Errorf("lcowdriver: cannot create layer folder with missing parent %s", parent)
  499. }
  500. layerChain = []string{d.dir(parent)}
  501. }
  502. layerChain = append(layerChain, parentChain...)
  503. // Make sure layers are created with the correct ACL so that VMs can access them.
  504. layerPath := d.dir(id)
  505. logrus.Debugf("lcowdriver: create: id %s: creating %s", id, layerPath)
  506. if err := system.MkdirAllWithACL(layerPath, 755, system.SddlNtvmAdministratorsLocalSystem); err != nil {
  507. return err
  508. }
  509. if err := d.setLayerChain(id, layerChain); err != nil {
  510. if err2 := os.RemoveAll(layerPath); err2 != nil {
  511. logrus.Warnf("failed to remove layer %s: %s", layerPath, err2)
  512. }
  513. return err
  514. }
  515. logrus.Debugf("lcowdriver: create: id %s: success", id)
  516. return nil
  517. }
  518. // Remove unmounts and removes the dir information.
  519. func (d *Driver) Remove(id string) error {
  520. logrus.Debugf("lcowdriver: remove: id %s", id)
  521. tmpID := fmt.Sprintf("%s-removing", id)
  522. tmpLayerPath := d.dir(tmpID)
  523. layerPath := d.dir(id)
  524. logrus.Debugf("lcowdriver: remove: id %s: layerPath %s", id, layerPath)
  525. // Unmount all the layers
  526. err := d.Put(id)
  527. if err != nil {
  528. logrus.Debugf("lcowdriver: remove id %s: failed to unmount: %s", id, err)
  529. return err
  530. }
  531. // for non-global case just kill the vm
  532. if !d.globalMode {
  533. if err := d.terminateServiceVM(id, fmt.Sprintf("Remove %s", id), true); err != nil {
  534. return err
  535. }
  536. }
  537. if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) {
  538. return err
  539. }
  540. if err := os.RemoveAll(tmpLayerPath); err != nil {
  541. return err
  542. }
  543. logrus.Debugf("lcowdriver: remove: id %s: layerPath %s succeeded", id, layerPath)
  544. return nil
  545. }
  546. // Get returns the rootfs path for the id. It is reference counted and
  547. // effectively can be thought of as a "mount the layer into the utility
  548. // vm if it isn't already". The contract from the caller of this is that
  549. // all Gets and Puts are matched. It -should- be the case that on cleanup,
  550. // nothing is mounted.
  551. //
  552. // For optimisation, we don't actually mount the filesystem (which in our
  553. // case means [hot-]adding it to a service VM. But we track that and defer
  554. // the actual adding to the point we need to access it.
  555. func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) {
  556. title := fmt.Sprintf("lcowdriver: get: %s", id)
  557. logrus.Debugf(title)
  558. // Generate the mounts needed for the defered operation.
  559. disks, err := d.getAllMounts(id)
  560. if err != nil {
  561. logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err)
  562. return nil, fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err)
  563. }
  564. logrus.Debugf("%s: got layer mounts: %+v", title, disks)
  565. return &lcowfs{
  566. root: unionMountName(disks),
  567. d: d,
  568. mappedDisks: disks,
  569. vmID: d.getVMID(id),
  570. }, nil
  571. }
  572. // Put does the reverse of get. If there are no more references to
  573. // the layer, it unmounts it from the utility VM.
  574. func (d *Driver) Put(id string) error {
  575. title := fmt.Sprintf("lcowdriver: put: %s", id)
  576. // Get the service VM that we need to remove from
  577. svm, err := d.serviceVms.get(d.getVMID(id))
  578. if err == errVMUnknown {
  579. return nil
  580. } else if err == errVMisTerminating {
  581. return svm.getStopError()
  582. }
  583. // Generate the mounts that Get() might have mounted
  584. disks, err := d.getAllMounts(id)
  585. if err != nil {
  586. logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err)
  587. return fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err)
  588. }
  589. // Now, we want to perform the unmounts, hot-remove and stop the service vm.
  590. // We want to go though all the steps even if we have an error to clean up properly
  591. err = svm.deleteUnionMount(unionMountName(disks), disks...)
  592. if err != nil {
  593. logrus.Debugf("%s failed to delete union mount %s: %s", title, id, err)
  594. }
  595. err1 := svm.hotRemoveVHDs(disks...)
  596. if err1 != nil {
  597. logrus.Debugf("%s failed to hot remove vhds %s: %s", title, id, err)
  598. if err == nil {
  599. err = err1
  600. }
  601. }
  602. err1 = d.terminateServiceVM(id, fmt.Sprintf("Put %s", id), false)
  603. if err1 != nil {
  604. logrus.Debugf("%s failed to terminate service vm %s: %s", title, id, err1)
  605. if err == nil {
  606. err = err1
  607. }
  608. }
  609. logrus.Debugf("Put succeeded on id %s", id)
  610. return err
  611. }
  612. // Cleanup ensures the information the driver stores is properly removed.
  613. // We use this opportunity to cleanup any -removing folders which may be
  614. // still left if the daemon was killed while it was removing a layer.
  615. func (d *Driver) Cleanup() error {
  616. title := "lcowdriver: cleanup"
  617. items, err := ioutil.ReadDir(d.dataRoot)
  618. if err != nil {
  619. if os.IsNotExist(err) {
  620. return nil
  621. }
  622. return err
  623. }
  624. // Note we don't return an error below - it's possible the files
  625. // are locked. However, next time around after the daemon exits,
  626. // we likely will be able to to cleanup successfully. Instead we log
  627. // warnings if there are errors.
  628. for _, item := range items {
  629. if item.IsDir() && strings.HasSuffix(item.Name(), "-removing") {
  630. if err := os.RemoveAll(filepath.Join(d.dataRoot, item.Name())); err != nil {
  631. logrus.Warnf("%s failed to cleanup %s: %s", title, item.Name(), err)
  632. } else {
  633. logrus.Infof("%s cleaned up %s", title, item.Name())
  634. }
  635. }
  636. }
  637. // Cleanup any service VMs we have running, along with their scratch spaces.
  638. // We don't take the lock for this as it's taken in terminateServiceVm.
  639. for k, v := range d.serviceVms.svms {
  640. logrus.Debugf("%s svm entry: %s: %+v", title, k, v)
  641. d.terminateServiceVM(k, "cleanup", true)
  642. }
  643. return nil
  644. }
  645. // Diff takes a layer (and it's parent layer which may be null, but
  646. // is ignored by this implementation below) and returns a reader for
  647. // a tarstream representing the layers contents. The id could be
  648. // a read-only "layer.vhd" or a read-write "sandbox.vhdx". The semantics
  649. // of this function dictate that the layer is already mounted.
  650. // However, as we do lazy mounting as a performance optimisation,
  651. // this will likely not be the case.
  652. func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
  653. title := fmt.Sprintf("lcowdriver: diff: %s", id)
  654. // Get VHDX info
  655. ld, err := getLayerDetails(d.dir(id))
  656. if err != nil {
  657. logrus.Debugf("%s: failed to get vhdx information of %s: %s", title, d.dir(id), err)
  658. return nil, err
  659. }
  660. // Start the SVM with a mapped virtual disk. Note that if the SVM is
  661. // already running and we are in global mode, this will be
  662. // hot-added.
  663. mvd := hcsshim.MappedVirtualDisk{
  664. HostPath: ld.filename,
  665. ContainerPath: hostToGuest(ld.filename),
  666. CreateInUtilityVM: true,
  667. ReadOnly: true,
  668. }
  669. logrus.Debugf("%s: starting service VM", title)
  670. svm, err := d.startServiceVMIfNotRunning(id, []hcsshim.MappedVirtualDisk{mvd}, fmt.Sprintf("diff %s", id))
  671. if err != nil {
  672. return nil, err
  673. }
  674. logrus.Debugf("lcowdriver: diff: waiting for svm to finish booting")
  675. err = svm.getStartError()
  676. if err != nil {
  677. d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
  678. return nil, fmt.Errorf("lcowdriver: diff: svm failed to boot: %s", err)
  679. }
  680. // Obtain the tar stream for it
  681. logrus.Debugf("%s: %s %s, size %d, ReadOnly %t", title, ld.filename, mvd.ContainerPath, ld.size, ld.isSandbox)
  682. tarReadCloser, err := svm.config.VhdToTar(mvd.HostPath, mvd.ContainerPath, ld.isSandbox, ld.size)
  683. if err != nil {
  684. svm.hotRemoveVHDs(mvd)
  685. d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
  686. return nil, fmt.Errorf("%s failed to export layer to tar stream for id: %s, parent: %s : %s", title, id, parent, err)
  687. }
  688. logrus.Debugf("%s id %s parent %s completed successfully", title, id, parent)
  689. // In safe/non-global mode, we can't tear down the service VM until things have been read.
  690. return ioutils.NewReadCloserWrapper(tarReadCloser, func() error {
  691. tarReadCloser.Close()
  692. svm.hotRemoveVHDs(mvd)
  693. d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
  694. return nil
  695. }), nil
  696. }
  697. // ApplyDiff extracts the changeset from the given diff into the
  698. // layer with the specified id and parent, returning the size of the
  699. // new layer in bytes. The layer should not be mounted when calling
  700. // this function. Another way of describing this is that ApplyDiff writes
  701. // to a new layer (a VHD in LCOW) the contents of a tarstream it's given.
  702. func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
  703. logrus.Debugf("lcowdriver: applydiff: id %s", id)
  704. svm, err := d.startServiceVMIfNotRunning(id, nil, fmt.Sprintf("applydiff %s", id))
  705. if err != nil {
  706. return 0, err
  707. }
  708. defer d.terminateServiceVM(id, fmt.Sprintf("applydiff %s", id), false)
  709. logrus.Debugf("lcowdriver: applydiff: waiting for svm to finish booting")
  710. err = svm.getStartError()
  711. if err != nil {
  712. return 0, fmt.Errorf("lcowdriver: applydiff: svm failed to boot: %s", err)
  713. }
  714. // TODO @jhowardmsft - the retries are temporary to overcome platform reliability issues.
  715. // Obviously this will be removed as platform bugs are fixed.
  716. retries := 0
  717. for {
  718. retries++
  719. size, err := svm.config.TarToVhd(filepath.Join(d.dataRoot, id, layerFilename), diff)
  720. if err != nil {
  721. if retries <= 10 {
  722. continue
  723. }
  724. return 0, err
  725. }
  726. return size, err
  727. }
  728. }
  729. // Changes produces a list of changes between the specified layer
  730. // and its parent layer. If parent is "", then all changes will be ADD changes.
  731. // The layer should not be mounted when calling this function.
  732. func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
  733. logrus.Debugf("lcowdriver: changes: id %s parent %s", id, parent)
  734. // TODO @gupta-ak. Needs implementation with assistance from service VM
  735. return nil, nil
  736. }
  737. // DiffSize calculates the changes between the specified layer
  738. // and its parent and returns the size in bytes of the changes
  739. // relative to its base filesystem directory.
  740. func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
  741. logrus.Debugf("lcowdriver: diffsize: id %s", id)
  742. // TODO @gupta-ak. Needs implementation with assistance from service VM
  743. return 0, nil
  744. }
  745. // GetMetadata returns custom driver information.
  746. func (d *Driver) GetMetadata(id string) (map[string]string, error) {
  747. logrus.Debugf("lcowdriver: getmetadata: id %s", id)
  748. m := make(map[string]string)
  749. m["dir"] = d.dir(id)
  750. return m, nil
  751. }
  752. // GetLayerPath gets the layer path on host (path to VHD/VHDX)
  753. func (d *Driver) GetLayerPath(id string) (string, error) {
  754. return d.dir(id), nil
  755. }
  756. // dir returns the absolute path to the layer.
  757. func (d *Driver) dir(id string) string {
  758. return filepath.Join(d.dataRoot, filepath.Base(id))
  759. }
  760. // getLayerChain returns the layer chain information.
  761. func (d *Driver) getLayerChain(id string) ([]string, error) {
  762. jPath := filepath.Join(d.dir(id), "layerchain.json")
  763. logrus.Debugf("lcowdriver: getlayerchain: id %s json %s", id, jPath)
  764. content, err := ioutil.ReadFile(jPath)
  765. if os.IsNotExist(err) {
  766. return nil, nil
  767. } else if err != nil {
  768. return nil, fmt.Errorf("lcowdriver: getlayerchain: %s unable to read layerchain file %s: %s", id, jPath, err)
  769. }
  770. var layerChain []string
  771. err = json.Unmarshal(content, &layerChain)
  772. if err != nil {
  773. return nil, fmt.Errorf("lcowdriver: getlayerchain: %s failed to unmarshall layerchain file %s: %s", id, jPath, err)
  774. }
  775. return layerChain, nil
  776. }
  777. // setLayerChain stores the layer chain information on disk.
  778. func (d *Driver) setLayerChain(id string, chain []string) error {
  779. content, err := json.Marshal(&chain)
  780. if err != nil {
  781. return fmt.Errorf("lcowdriver: setlayerchain: %s failed to marshall layerchain json: %s", id, err)
  782. }
  783. jPath := filepath.Join(d.dir(id), "layerchain.json")
  784. logrus.Debugf("lcowdriver: setlayerchain: id %s json %s", id, jPath)
  785. err = ioutil.WriteFile(jPath, content, 0600)
  786. if err != nil {
  787. return fmt.Errorf("lcowdriver: setlayerchain: %s failed to write layerchain file: %s", id, err)
  788. }
  789. return nil
  790. }
  791. // getLayerDetails is a utility for getting a file name, size and indication of
  792. // sandbox for a VHD(x) in a folder. A read-only layer will be layer.vhd. A
  793. // read-write layer will be sandbox.vhdx.
  794. func getLayerDetails(folder string) (*layerDetails, error) {
  795. var fileInfo os.FileInfo
  796. ld := &layerDetails{
  797. isSandbox: false,
  798. filename: filepath.Join(folder, layerFilename),
  799. }
  800. fileInfo, err := os.Stat(ld.filename)
  801. if err != nil {
  802. ld.filename = filepath.Join(folder, sandboxFilename)
  803. if fileInfo, err = os.Stat(ld.filename); err != nil {
  804. return nil, fmt.Errorf("failed to locate layer or sandbox in %s", folder)
  805. }
  806. ld.isSandbox = true
  807. }
  808. ld.size = fileInfo.Size()
  809. return ld, nil
  810. }
  811. func (d *Driver) getAllMounts(id string) ([]hcsshim.MappedVirtualDisk, error) {
  812. layerChain, err := d.getLayerChain(id)
  813. if err != nil {
  814. return nil, err
  815. }
  816. layerChain = append([]string{d.dir(id)}, layerChain...)
  817. logrus.Debugf("getting all layers: %v", layerChain)
  818. disks := make([]hcsshim.MappedVirtualDisk, len(layerChain), len(layerChain))
  819. for i := range layerChain {
  820. ld, err := getLayerDetails(layerChain[i])
  821. if err != nil {
  822. logrus.Debugf("Failed to get LayerVhdDetails from %s: %s", layerChain[i], err)
  823. return nil, err
  824. }
  825. disks[i].HostPath = ld.filename
  826. disks[i].ContainerPath = hostToGuest(ld.filename)
  827. disks[i].CreateInUtilityVM = true
  828. disks[i].ReadOnly = !ld.isSandbox
  829. }
  830. return disks, nil
  831. }
  832. func hostToGuest(hostpath string) string {
  833. return fmt.Sprintf("/tmp/%s", filepath.Base(filepath.Dir(hostpath)))
  834. }
  835. func unionMountName(disks []hcsshim.MappedVirtualDisk) string {
  836. return fmt.Sprintf("%s-mount", disks[0].ContainerPath)
  837. }