system.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606
  1. package hcs
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "strings"
  7. "sync"
  8. "syscall"
  9. "github.com/Microsoft/hcsshim/internal/cow"
  10. "github.com/Microsoft/hcsshim/internal/log"
  11. "github.com/Microsoft/hcsshim/internal/oc"
  12. "github.com/Microsoft/hcsshim/internal/schema1"
  13. hcsschema "github.com/Microsoft/hcsshim/internal/schema2"
  14. "github.com/Microsoft/hcsshim/internal/timeout"
  15. "github.com/Microsoft/hcsshim/internal/vmcompute"
  16. "go.opencensus.io/trace"
  17. )
  18. type System struct {
  19. handleLock sync.RWMutex
  20. handle vmcompute.HcsSystem
  21. id string
  22. callbackNumber uintptr
  23. closedWaitOnce sync.Once
  24. waitBlock chan struct{}
  25. waitError error
  26. exitError error
  27. os, typ string
  28. }
  29. func newSystem(id string) *System {
  30. return &System{
  31. id: id,
  32. waitBlock: make(chan struct{}),
  33. }
  34. }
  35. // CreateComputeSystem creates a new compute system with the given configuration but does not start it.
  36. func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface interface{}) (_ *System, err error) {
  37. operation := "hcsshim::CreateComputeSystem"
  38. // hcsCreateComputeSystemContext is an async operation. Start the outer span
  39. // here to measure the full create time.
  40. ctx, span := trace.StartSpan(ctx, operation)
  41. defer span.End()
  42. defer func() { oc.SetSpanStatus(span, err) }()
  43. span.AddAttributes(trace.StringAttribute("cid", id))
  44. computeSystem := newSystem(id)
  45. hcsDocumentB, err := json.Marshal(hcsDocumentInterface)
  46. if err != nil {
  47. return nil, err
  48. }
  49. hcsDocument := string(hcsDocumentB)
  50. var (
  51. identity syscall.Handle
  52. resultJSON string
  53. createError error
  54. )
  55. computeSystem.handle, resultJSON, createError = vmcompute.HcsCreateComputeSystem(ctx, id, hcsDocument, identity)
  56. if createError == nil || IsPending(createError) {
  57. defer func() {
  58. if err != nil {
  59. computeSystem.Close()
  60. }
  61. }()
  62. if err = computeSystem.registerCallback(ctx); err != nil {
  63. // Terminate the compute system if it still exists. We're okay to
  64. // ignore a failure here.
  65. computeSystem.Terminate(ctx)
  66. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  67. }
  68. }
  69. events, err := processAsyncHcsResult(ctx, createError, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemCreateCompleted, &timeout.SystemCreate)
  70. if err != nil {
  71. if err == ErrTimeout {
  72. // Terminate the compute system if it still exists. We're okay to
  73. // ignore a failure here.
  74. computeSystem.Terminate(ctx)
  75. }
  76. return nil, makeSystemError(computeSystem, operation, hcsDocument, err, events)
  77. }
  78. go computeSystem.waitBackground()
  79. if err = computeSystem.getCachedProperties(ctx); err != nil {
  80. return nil, err
  81. }
  82. return computeSystem, nil
  83. }
  84. // OpenComputeSystem opens an existing compute system by ID.
  85. func OpenComputeSystem(ctx context.Context, id string) (*System, error) {
  86. operation := "hcsshim::OpenComputeSystem"
  87. computeSystem := newSystem(id)
  88. handle, resultJSON, err := vmcompute.HcsOpenComputeSystem(ctx, id)
  89. events := processHcsResult(ctx, resultJSON)
  90. if err != nil {
  91. return nil, makeSystemError(computeSystem, operation, "", err, events)
  92. }
  93. computeSystem.handle = handle
  94. defer func() {
  95. if err != nil {
  96. computeSystem.Close()
  97. }
  98. }()
  99. if err = computeSystem.registerCallback(ctx); err != nil {
  100. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  101. }
  102. go computeSystem.waitBackground()
  103. if err = computeSystem.getCachedProperties(ctx); err != nil {
  104. return nil, err
  105. }
  106. return computeSystem, nil
  107. }
  108. func (computeSystem *System) getCachedProperties(ctx context.Context) error {
  109. props, err := computeSystem.Properties(ctx)
  110. if err != nil {
  111. return err
  112. }
  113. computeSystem.typ = strings.ToLower(props.SystemType)
  114. computeSystem.os = strings.ToLower(props.RuntimeOSType)
  115. if computeSystem.os == "" && computeSystem.typ == "container" {
  116. // Pre-RS5 HCS did not return the OS, but it only supported containers
  117. // that ran Windows.
  118. computeSystem.os = "windows"
  119. }
  120. return nil
  121. }
  122. // OS returns the operating system of the compute system, "linux" or "windows".
  123. func (computeSystem *System) OS() string {
  124. return computeSystem.os
  125. }
  126. // IsOCI returns whether processes in the compute system should be created via
  127. // OCI.
  128. func (computeSystem *System) IsOCI() bool {
  129. return computeSystem.os == "linux" && computeSystem.typ == "container"
  130. }
  131. // GetComputeSystems gets a list of the compute systems on the system that match the query
  132. func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]schema1.ContainerProperties, error) {
  133. operation := "hcsshim::GetComputeSystems"
  134. queryb, err := json.Marshal(q)
  135. if err != nil {
  136. return nil, err
  137. }
  138. computeSystemsJSON, resultJSON, err := vmcompute.HcsEnumerateComputeSystems(ctx, string(queryb))
  139. events := processHcsResult(ctx, resultJSON)
  140. if err != nil {
  141. return nil, &HcsError{Op: operation, Err: err, Events: events}
  142. }
  143. if computeSystemsJSON == "" {
  144. return nil, ErrUnexpectedValue
  145. }
  146. computeSystems := []schema1.ContainerProperties{}
  147. if err = json.Unmarshal([]byte(computeSystemsJSON), &computeSystems); err != nil {
  148. return nil, err
  149. }
  150. return computeSystems, nil
  151. }
  152. // Start synchronously starts the computeSystem.
  153. func (computeSystem *System) Start(ctx context.Context) (err error) {
  154. operation := "hcsshim::System::Start"
  155. // hcsStartComputeSystemContext is an async operation. Start the outer span
  156. // here to measure the full start time.
  157. ctx, span := trace.StartSpan(ctx, operation)
  158. defer span.End()
  159. defer func() { oc.SetSpanStatus(span, err) }()
  160. span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
  161. computeSystem.handleLock.RLock()
  162. defer computeSystem.handleLock.RUnlock()
  163. if computeSystem.handle == 0 {
  164. return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
  165. }
  166. resultJSON, err := vmcompute.HcsStartComputeSystem(ctx, computeSystem.handle, "")
  167. events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemStartCompleted, &timeout.SystemStart)
  168. if err != nil {
  169. return makeSystemError(computeSystem, operation, "", err, events)
  170. }
  171. return nil
  172. }
  173. // ID returns the compute system's identifier.
  174. func (computeSystem *System) ID() string {
  175. return computeSystem.id
  176. }
  177. // Shutdown requests a compute system shutdown.
  178. func (computeSystem *System) Shutdown(ctx context.Context) error {
  179. computeSystem.handleLock.RLock()
  180. defer computeSystem.handleLock.RUnlock()
  181. operation := "hcsshim::System::Shutdown"
  182. if computeSystem.handle == 0 {
  183. return nil
  184. }
  185. resultJSON, err := vmcompute.HcsShutdownComputeSystem(ctx, computeSystem.handle, "")
  186. events := processHcsResult(ctx, resultJSON)
  187. switch err {
  188. case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending:
  189. default:
  190. return makeSystemError(computeSystem, operation, "", err, events)
  191. }
  192. return nil
  193. }
  194. // Terminate requests a compute system terminate.
  195. func (computeSystem *System) Terminate(ctx context.Context) error {
  196. computeSystem.handleLock.RLock()
  197. defer computeSystem.handleLock.RUnlock()
  198. operation := "hcsshim::System::Terminate"
  199. if computeSystem.handle == 0 {
  200. return nil
  201. }
  202. resultJSON, err := vmcompute.HcsTerminateComputeSystem(ctx, computeSystem.handle, "")
  203. events := processHcsResult(ctx, resultJSON)
  204. switch err {
  205. case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending:
  206. default:
  207. return makeSystemError(computeSystem, operation, "", err, events)
  208. }
  209. return nil
  210. }
  211. // waitBackground waits for the compute system exit notification. Once received
  212. // sets `computeSystem.waitError` (if any) and unblocks all `Wait` calls.
  213. //
  214. // This MUST be called exactly once per `computeSystem.handle` but `Wait` is
  215. // safe to call multiple times.
  216. func (computeSystem *System) waitBackground() {
  217. operation := "hcsshim::System::waitBackground"
  218. ctx, span := trace.StartSpan(context.Background(), operation)
  219. defer span.End()
  220. span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
  221. err := waitForNotification(ctx, computeSystem.callbackNumber, hcsNotificationSystemExited, nil)
  222. switch err {
  223. case nil:
  224. log.G(ctx).Debug("system exited")
  225. case ErrVmcomputeUnexpectedExit:
  226. log.G(ctx).Debug("unexpected system exit")
  227. computeSystem.exitError = makeSystemError(computeSystem, operation, "", err, nil)
  228. err = nil
  229. default:
  230. err = makeSystemError(computeSystem, operation, "", err, nil)
  231. }
  232. computeSystem.closedWaitOnce.Do(func() {
  233. computeSystem.waitError = err
  234. close(computeSystem.waitBlock)
  235. })
  236. oc.SetSpanStatus(span, err)
  237. }
  238. // Wait synchronously waits for the compute system to shutdown or terminate. If
  239. // the compute system has already exited returns the previous error (if any).
  240. func (computeSystem *System) Wait() error {
  241. <-computeSystem.waitBlock
  242. return computeSystem.waitError
  243. }
  244. // ExitError returns an error describing the reason the compute system terminated.
  245. func (computeSystem *System) ExitError() error {
  246. select {
  247. case <-computeSystem.waitBlock:
  248. if computeSystem.waitError != nil {
  249. return computeSystem.waitError
  250. }
  251. return computeSystem.exitError
  252. default:
  253. return errors.New("container not exited")
  254. }
  255. }
  256. // Properties returns the requested container properties targeting a V1 schema container.
  257. func (computeSystem *System) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) {
  258. computeSystem.handleLock.RLock()
  259. defer computeSystem.handleLock.RUnlock()
  260. operation := "hcsshim::System::Properties"
  261. queryBytes, err := json.Marshal(schema1.PropertyQuery{PropertyTypes: types})
  262. if err != nil {
  263. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  264. }
  265. propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes))
  266. events := processHcsResult(ctx, resultJSON)
  267. if err != nil {
  268. return nil, makeSystemError(computeSystem, operation, "", err, events)
  269. }
  270. if propertiesJSON == "" {
  271. return nil, ErrUnexpectedValue
  272. }
  273. properties := &schema1.ContainerProperties{}
  274. if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil {
  275. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  276. }
  277. return properties, nil
  278. }
  279. // PropertiesV2 returns the requested container properties targeting a V2 schema container.
  280. func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) {
  281. computeSystem.handleLock.RLock()
  282. defer computeSystem.handleLock.RUnlock()
  283. operation := "hcsshim::System::PropertiesV2"
  284. queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types})
  285. if err != nil {
  286. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  287. }
  288. propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes))
  289. events := processHcsResult(ctx, resultJSON)
  290. if err != nil {
  291. return nil, makeSystemError(computeSystem, operation, "", err, events)
  292. }
  293. if propertiesJSON == "" {
  294. return nil, ErrUnexpectedValue
  295. }
  296. properties := &hcsschema.Properties{}
  297. if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil {
  298. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  299. }
  300. return properties, nil
  301. }
  302. // Pause pauses the execution of the computeSystem. This feature is not enabled in TP5.
  303. func (computeSystem *System) Pause(ctx context.Context) (err error) {
  304. operation := "hcsshim::System::Pause"
  305. // hcsPauseComputeSystemContext is an async peration. Start the outer span
  306. // here to measure the full pause time.
  307. ctx, span := trace.StartSpan(ctx, operation)
  308. defer span.End()
  309. defer func() { oc.SetSpanStatus(span, err) }()
  310. span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
  311. computeSystem.handleLock.RLock()
  312. defer computeSystem.handleLock.RUnlock()
  313. if computeSystem.handle == 0 {
  314. return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
  315. }
  316. resultJSON, err := vmcompute.HcsPauseComputeSystem(ctx, computeSystem.handle, "")
  317. events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemPauseCompleted, &timeout.SystemPause)
  318. if err != nil {
  319. return makeSystemError(computeSystem, operation, "", err, events)
  320. }
  321. return nil
  322. }
  323. // Resume resumes the execution of the computeSystem. This feature is not enabled in TP5.
  324. func (computeSystem *System) Resume(ctx context.Context) (err error) {
  325. operation := "hcsshim::System::Resume"
  326. // hcsResumeComputeSystemContext is an async operation. Start the outer span
  327. // here to measure the full restore time.
  328. ctx, span := trace.StartSpan(ctx, operation)
  329. defer span.End()
  330. defer func() { oc.SetSpanStatus(span, err) }()
  331. span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
  332. computeSystem.handleLock.RLock()
  333. defer computeSystem.handleLock.RUnlock()
  334. if computeSystem.handle == 0 {
  335. return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
  336. }
  337. resultJSON, err := vmcompute.HcsResumeComputeSystem(ctx, computeSystem.handle, "")
  338. events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemResumeCompleted, &timeout.SystemResume)
  339. if err != nil {
  340. return makeSystemError(computeSystem, operation, "", err, events)
  341. }
  342. return nil
  343. }
  344. func (computeSystem *System) createProcess(ctx context.Context, operation string, c interface{}) (*Process, *vmcompute.HcsProcessInformation, error) {
  345. computeSystem.handleLock.RLock()
  346. defer computeSystem.handleLock.RUnlock()
  347. if computeSystem.handle == 0 {
  348. return nil, nil, makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
  349. }
  350. configurationb, err := json.Marshal(c)
  351. if err != nil {
  352. return nil, nil, makeSystemError(computeSystem, operation, "", err, nil)
  353. }
  354. configuration := string(configurationb)
  355. processInfo, processHandle, resultJSON, err := vmcompute.HcsCreateProcess(ctx, computeSystem.handle, configuration)
  356. events := processHcsResult(ctx, resultJSON)
  357. if err != nil {
  358. return nil, nil, makeSystemError(computeSystem, operation, configuration, err, events)
  359. }
  360. log.G(ctx).WithField("pid", processInfo.ProcessId).Debug("created process pid")
  361. return newProcess(processHandle, int(processInfo.ProcessId), computeSystem), &processInfo, nil
  362. }
  363. // CreateProcess launches a new process within the computeSystem.
  364. func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) (cow.Process, error) {
  365. operation := "hcsshim::System::CreateProcess"
  366. process, processInfo, err := computeSystem.createProcess(ctx, operation, c)
  367. if err != nil {
  368. return nil, err
  369. }
  370. defer func() {
  371. if err != nil {
  372. process.Close()
  373. }
  374. }()
  375. pipes, err := makeOpenFiles([]syscall.Handle{processInfo.StdInput, processInfo.StdOutput, processInfo.StdError})
  376. if err != nil {
  377. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  378. }
  379. process.stdin = pipes[0]
  380. process.stdout = pipes[1]
  381. process.stderr = pipes[2]
  382. process.hasCachedStdio = true
  383. if err = process.registerCallback(ctx); err != nil {
  384. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  385. }
  386. go process.waitBackground()
  387. return process, nil
  388. }
  389. // OpenProcess gets an interface to an existing process within the computeSystem.
  390. func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process, error) {
  391. computeSystem.handleLock.RLock()
  392. defer computeSystem.handleLock.RUnlock()
  393. operation := "hcsshim::System::OpenProcess"
  394. if computeSystem.handle == 0 {
  395. return nil, makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
  396. }
  397. processHandle, resultJSON, err := vmcompute.HcsOpenProcess(ctx, computeSystem.handle, uint32(pid))
  398. events := processHcsResult(ctx, resultJSON)
  399. if err != nil {
  400. return nil, makeSystemError(computeSystem, operation, "", err, events)
  401. }
  402. process := newProcess(processHandle, pid, computeSystem)
  403. if err = process.registerCallback(ctx); err != nil {
  404. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  405. }
  406. go process.waitBackground()
  407. return process, nil
  408. }
  409. // Close cleans up any state associated with the compute system but does not terminate or wait for it.
  410. func (computeSystem *System) Close() (err error) {
  411. operation := "hcsshim::System::Close"
  412. ctx, span := trace.StartSpan(context.Background(), operation)
  413. defer span.End()
  414. defer func() { oc.SetSpanStatus(span, err) }()
  415. span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
  416. computeSystem.handleLock.Lock()
  417. defer computeSystem.handleLock.Unlock()
  418. // Don't double free this
  419. if computeSystem.handle == 0 {
  420. return nil
  421. }
  422. if err = computeSystem.unregisterCallback(ctx); err != nil {
  423. return makeSystemError(computeSystem, operation, "", err, nil)
  424. }
  425. err = vmcompute.HcsCloseComputeSystem(ctx, computeSystem.handle)
  426. if err != nil {
  427. return makeSystemError(computeSystem, operation, "", err, nil)
  428. }
  429. computeSystem.handle = 0
  430. computeSystem.closedWaitOnce.Do(func() {
  431. computeSystem.waitError = ErrAlreadyClosed
  432. close(computeSystem.waitBlock)
  433. })
  434. return nil
  435. }
  436. func (computeSystem *System) registerCallback(ctx context.Context) error {
  437. callbackContext := &notifcationWatcherContext{
  438. channels: newSystemChannels(),
  439. systemID: computeSystem.id,
  440. }
  441. callbackMapLock.Lock()
  442. callbackNumber := nextCallback
  443. nextCallback++
  444. callbackMap[callbackNumber] = callbackContext
  445. callbackMapLock.Unlock()
  446. callbackHandle, err := vmcompute.HcsRegisterComputeSystemCallback(ctx, computeSystem.handle, notificationWatcherCallback, callbackNumber)
  447. if err != nil {
  448. return err
  449. }
  450. callbackContext.handle = callbackHandle
  451. computeSystem.callbackNumber = callbackNumber
  452. return nil
  453. }
  454. func (computeSystem *System) unregisterCallback(ctx context.Context) error {
  455. callbackNumber := computeSystem.callbackNumber
  456. callbackMapLock.RLock()
  457. callbackContext := callbackMap[callbackNumber]
  458. callbackMapLock.RUnlock()
  459. if callbackContext == nil {
  460. return nil
  461. }
  462. handle := callbackContext.handle
  463. if handle == 0 {
  464. return nil
  465. }
  466. // hcsUnregisterComputeSystemCallback has its own syncronization
  467. // to wait for all callbacks to complete. We must NOT hold the callbackMapLock.
  468. err := vmcompute.HcsUnregisterComputeSystemCallback(ctx, handle)
  469. if err != nil {
  470. return err
  471. }
  472. closeChannels(callbackContext.channels)
  473. callbackMapLock.Lock()
  474. delete(callbackMap, callbackNumber)
  475. callbackMapLock.Unlock()
  476. handle = 0
  477. return nil
  478. }
  479. // Modify the System by sending a request to HCS
  480. func (computeSystem *System) Modify(ctx context.Context, config interface{}) error {
  481. computeSystem.handleLock.RLock()
  482. defer computeSystem.handleLock.RUnlock()
  483. operation := "hcsshim::System::Modify"
  484. if computeSystem.handle == 0 {
  485. return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
  486. }
  487. requestBytes, err := json.Marshal(config)
  488. if err != nil {
  489. return err
  490. }
  491. requestJSON := string(requestBytes)
  492. resultJSON, err := vmcompute.HcsModifyComputeSystem(ctx, computeSystem.handle, requestJSON)
  493. events := processHcsResult(ctx, resultJSON)
  494. if err != nil {
  495. return makeSystemError(computeSystem, operation, requestJSON, err, events)
  496. }
  497. return nil
  498. }