system.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. package hcs
  2. import (
  3. "encoding/json"
  4. "os"
  5. "strconv"
  6. "sync"
  7. "syscall"
  8. "time"
  9. "github.com/Microsoft/hcsshim/internal/interop"
  10. "github.com/Microsoft/hcsshim/internal/schema1"
  11. "github.com/Microsoft/hcsshim/internal/timeout"
  12. "github.com/sirupsen/logrus"
  13. )
  14. // currentContainerStarts is used to limit the number of concurrent container
  15. // starts.
  16. var currentContainerStarts containerStarts
  17. type containerStarts struct {
  18. maxParallel int
  19. inProgress int
  20. sync.Mutex
  21. }
  22. func init() {
  23. mpsS := os.Getenv("HCSSHIM_MAX_PARALLEL_START")
  24. if len(mpsS) > 0 {
  25. mpsI, err := strconv.Atoi(mpsS)
  26. if err != nil || mpsI < 0 {
  27. return
  28. }
  29. currentContainerStarts.maxParallel = mpsI
  30. }
  31. }
  32. type System struct {
  33. handleLock sync.RWMutex
  34. handle hcsSystem
  35. id string
  36. callbackNumber uintptr
  37. }
  38. // CreateComputeSystem creates a new compute system with the given configuration but does not start it.
  39. func CreateComputeSystem(id string, hcsDocumentInterface interface{}) (*System, error) {
  40. operation := "CreateComputeSystem"
  41. title := "hcsshim::" + operation
  42. computeSystem := &System{
  43. id: id,
  44. }
  45. hcsDocumentB, err := json.Marshal(hcsDocumentInterface)
  46. if err != nil {
  47. return nil, err
  48. }
  49. hcsDocument := string(hcsDocumentB)
  50. logrus.Debugf(title+" ID=%s config=%s", id, hcsDocument)
  51. var (
  52. resultp *uint16
  53. identity syscall.Handle
  54. )
  55. createError := hcsCreateComputeSystem(id, hcsDocument, identity, &computeSystem.handle, &resultp)
  56. if createError == nil || IsPending(createError) {
  57. if err := computeSystem.registerCallback(); err != nil {
  58. // Terminate the compute system if it still exists. We're okay to
  59. // ignore a failure here.
  60. computeSystem.Terminate()
  61. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  62. }
  63. }
  64. events, err := processAsyncHcsResult(createError, resultp, computeSystem.callbackNumber, hcsNotificationSystemCreateCompleted, &timeout.Duration)
  65. if err != nil {
  66. if err == ErrTimeout {
  67. // Terminate the compute system if it still exists. We're okay to
  68. // ignore a failure here.
  69. computeSystem.Terminate()
  70. }
  71. return nil, makeSystemError(computeSystem, operation, hcsDocument, err, events)
  72. }
  73. logrus.Debugf(title+" succeeded id=%s handle=%d", id, computeSystem.handle)
  74. return computeSystem, nil
  75. }
  76. // OpenComputeSystem opens an existing compute system by ID.
  77. func OpenComputeSystem(id string) (*System, error) {
  78. operation := "OpenComputeSystem"
  79. title := "hcsshim::" + operation
  80. logrus.Debugf(title+" ID=%s", id)
  81. computeSystem := &System{
  82. id: id,
  83. }
  84. var (
  85. handle hcsSystem
  86. resultp *uint16
  87. )
  88. err := hcsOpenComputeSystem(id, &handle, &resultp)
  89. events := processHcsResult(resultp)
  90. if err != nil {
  91. return nil, makeSystemError(computeSystem, operation, "", err, events)
  92. }
  93. computeSystem.handle = handle
  94. if err := computeSystem.registerCallback(); err != nil {
  95. return nil, makeSystemError(computeSystem, operation, "", err, nil)
  96. }
  97. logrus.Debugf(title+" succeeded id=%s handle=%d", id, handle)
  98. return computeSystem, nil
  99. }
  100. // GetComputeSystems gets a list of the compute systems on the system that match the query
  101. func GetComputeSystems(q schema1.ComputeSystemQuery) ([]schema1.ContainerProperties, error) {
  102. operation := "GetComputeSystems"
  103. title := "hcsshim::" + operation
  104. queryb, err := json.Marshal(q)
  105. if err != nil {
  106. return nil, err
  107. }
  108. query := string(queryb)
  109. logrus.Debugf(title+" query=%s", query)
  110. var (
  111. resultp *uint16
  112. computeSystemsp *uint16
  113. )
  114. err = hcsEnumerateComputeSystems(query, &computeSystemsp, &resultp)
  115. events := processHcsResult(resultp)
  116. if err != nil {
  117. return nil, &HcsError{Op: operation, Err: err, Events: events}
  118. }
  119. if computeSystemsp == nil {
  120. return nil, ErrUnexpectedValue
  121. }
  122. computeSystemsRaw := interop.ConvertAndFreeCoTaskMemBytes(computeSystemsp)
  123. computeSystems := []schema1.ContainerProperties{}
  124. if err := json.Unmarshal(computeSystemsRaw, &computeSystems); err != nil {
  125. return nil, err
  126. }
  127. logrus.Debugf(title + " succeeded")
  128. return computeSystems, nil
  129. }
  130. // Start synchronously starts the computeSystem.
  131. func (computeSystem *System) Start() error {
  132. computeSystem.handleLock.RLock()
  133. defer computeSystem.handleLock.RUnlock()
  134. title := "hcsshim::ComputeSystem::Start ID=" + computeSystem.ID()
  135. logrus.Debugf(title)
  136. if computeSystem.handle == 0 {
  137. return makeSystemError(computeSystem, "Start", "", ErrAlreadyClosed, nil)
  138. }
  139. // This is a very simple backoff-retry loop to limit the number
  140. // of parallel container starts if environment variable
  141. // HCSSHIM_MAX_PARALLEL_START is set to a positive integer.
  142. // It should generally only be used as a workaround to various
  143. // platform issues that exist between RS1 and RS4 as of Aug 2018
  144. if currentContainerStarts.maxParallel > 0 {
  145. for {
  146. currentContainerStarts.Lock()
  147. if currentContainerStarts.inProgress < currentContainerStarts.maxParallel {
  148. currentContainerStarts.inProgress++
  149. currentContainerStarts.Unlock()
  150. break
  151. }
  152. if currentContainerStarts.inProgress == currentContainerStarts.maxParallel {
  153. currentContainerStarts.Unlock()
  154. time.Sleep(100 * time.Millisecond)
  155. }
  156. }
  157. // Make sure we decrement the count when we are done.
  158. defer func() {
  159. currentContainerStarts.Lock()
  160. currentContainerStarts.inProgress--
  161. currentContainerStarts.Unlock()
  162. }()
  163. }
  164. var resultp *uint16
  165. err := hcsStartComputeSystem(computeSystem.handle, "", &resultp)
  166. events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemStartCompleted, &timeout.Duration)
  167. if err != nil {
  168. return makeSystemError(computeSystem, "Start", "", err, events)
  169. }
  170. logrus.Debugf(title + " succeeded")
  171. return nil
  172. }
  173. // ID returns the compute system's identifier.
  174. func (computeSystem *System) ID() string {
  175. return computeSystem.id
  176. }
  177. // Shutdown requests a compute system shutdown, if IsPending() on the error returned is true,
  178. // it may not actually be shut down until Wait() succeeds.
  179. func (computeSystem *System) Shutdown() error {
  180. computeSystem.handleLock.RLock()
  181. defer computeSystem.handleLock.RUnlock()
  182. title := "hcsshim::ComputeSystem::Shutdown"
  183. logrus.Debugf(title)
  184. if computeSystem.handle == 0 {
  185. return makeSystemError(computeSystem, "Shutdown", "", ErrAlreadyClosed, nil)
  186. }
  187. var resultp *uint16
  188. err := hcsShutdownComputeSystem(computeSystem.handle, "", &resultp)
  189. events := processHcsResult(resultp)
  190. if err != nil {
  191. return makeSystemError(computeSystem, "Shutdown", "", err, events)
  192. }
  193. logrus.Debugf(title + " succeeded")
  194. return nil
  195. }
  196. // Terminate requests a compute system terminate, if IsPending() on the error returned is true,
  197. // it may not actually be shut down until Wait() succeeds.
  198. func (computeSystem *System) Terminate() error {
  199. computeSystem.handleLock.RLock()
  200. defer computeSystem.handleLock.RUnlock()
  201. title := "hcsshim::ComputeSystem::Terminate ID=" + computeSystem.ID()
  202. logrus.Debugf(title)
  203. if computeSystem.handle == 0 {
  204. return makeSystemError(computeSystem, "Terminate", "", ErrAlreadyClosed, nil)
  205. }
  206. var resultp *uint16
  207. err := hcsTerminateComputeSystem(computeSystem.handle, "", &resultp)
  208. events := processHcsResult(resultp)
  209. if err != nil {
  210. return makeSystemError(computeSystem, "Terminate", "", err, events)
  211. }
  212. logrus.Debugf(title + " succeeded")
  213. return nil
  214. }
  215. // Wait synchronously waits for the compute system to shutdown or terminate.
  216. func (computeSystem *System) Wait() error {
  217. title := "hcsshim::ComputeSystem::Wait ID=" + computeSystem.ID()
  218. logrus.Debugf(title)
  219. err := waitForNotification(computeSystem.callbackNumber, hcsNotificationSystemExited, nil)
  220. if err != nil {
  221. return makeSystemError(computeSystem, "Wait", "", err, nil)
  222. }
  223. logrus.Debugf(title + " succeeded")
  224. return nil
  225. }
  226. // WaitTimeout synchronously waits for the compute system to terminate or the duration to elapse.
  227. // If the timeout expires, IsTimeout(err) == true
  228. func (computeSystem *System) WaitTimeout(timeout time.Duration) error {
  229. title := "hcsshim::ComputeSystem::WaitTimeout ID=" + computeSystem.ID()
  230. logrus.Debugf(title)
  231. err := waitForNotification(computeSystem.callbackNumber, hcsNotificationSystemExited, &timeout)
  232. if err != nil {
  233. return makeSystemError(computeSystem, "WaitTimeout", "", err, nil)
  234. }
  235. logrus.Debugf(title + " succeeded")
  236. return nil
  237. }
  238. func (computeSystem *System) Properties(types ...schema1.PropertyType) (*schema1.ContainerProperties, error) {
  239. computeSystem.handleLock.RLock()
  240. defer computeSystem.handleLock.RUnlock()
  241. queryj, err := json.Marshal(schema1.PropertyQuery{types})
  242. if err != nil {
  243. return nil, makeSystemError(computeSystem, "Properties", "", err, nil)
  244. }
  245. var resultp, propertiesp *uint16
  246. err = hcsGetComputeSystemProperties(computeSystem.handle, string(queryj), &propertiesp, &resultp)
  247. events := processHcsResult(resultp)
  248. if err != nil {
  249. return nil, makeSystemError(computeSystem, "Properties", "", err, events)
  250. }
  251. if propertiesp == nil {
  252. return nil, ErrUnexpectedValue
  253. }
  254. propertiesRaw := interop.ConvertAndFreeCoTaskMemBytes(propertiesp)
  255. properties := &schema1.ContainerProperties{}
  256. if err := json.Unmarshal(propertiesRaw, properties); err != nil {
  257. return nil, makeSystemError(computeSystem, "Properties", "", err, nil)
  258. }
  259. return properties, nil
  260. }
  261. // Pause pauses the execution of the computeSystem. This feature is not enabled in TP5.
  262. func (computeSystem *System) Pause() error {
  263. computeSystem.handleLock.RLock()
  264. defer computeSystem.handleLock.RUnlock()
  265. title := "hcsshim::ComputeSystem::Pause ID=" + computeSystem.ID()
  266. logrus.Debugf(title)
  267. if computeSystem.handle == 0 {
  268. return makeSystemError(computeSystem, "Pause", "", ErrAlreadyClosed, nil)
  269. }
  270. var resultp *uint16
  271. err := hcsPauseComputeSystem(computeSystem.handle, "", &resultp)
  272. events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemPauseCompleted, &timeout.Duration)
  273. if err != nil {
  274. return makeSystemError(computeSystem, "Pause", "", err, events)
  275. }
  276. logrus.Debugf(title + " succeeded")
  277. return nil
  278. }
  279. // Resume resumes the execution of the computeSystem. This feature is not enabled in TP5.
  280. func (computeSystem *System) Resume() error {
  281. computeSystem.handleLock.RLock()
  282. defer computeSystem.handleLock.RUnlock()
  283. title := "hcsshim::ComputeSystem::Resume ID=" + computeSystem.ID()
  284. logrus.Debugf(title)
  285. if computeSystem.handle == 0 {
  286. return makeSystemError(computeSystem, "Resume", "", ErrAlreadyClosed, nil)
  287. }
  288. var resultp *uint16
  289. err := hcsResumeComputeSystem(computeSystem.handle, "", &resultp)
  290. events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemResumeCompleted, &timeout.Duration)
  291. if err != nil {
  292. return makeSystemError(computeSystem, "Resume", "", err, events)
  293. }
  294. logrus.Debugf(title + " succeeded")
  295. return nil
  296. }
  297. // CreateProcess launches a new process within the computeSystem.
  298. func (computeSystem *System) CreateProcess(c interface{}) (*Process, error) {
  299. computeSystem.handleLock.RLock()
  300. defer computeSystem.handleLock.RUnlock()
  301. title := "hcsshim::ComputeSystem::CreateProcess ID=" + computeSystem.ID()
  302. var (
  303. processInfo hcsProcessInformation
  304. processHandle hcsProcess
  305. resultp *uint16
  306. )
  307. if computeSystem.handle == 0 {
  308. return nil, makeSystemError(computeSystem, "CreateProcess", "", ErrAlreadyClosed, nil)
  309. }
  310. configurationb, err := json.Marshal(c)
  311. if err != nil {
  312. return nil, makeSystemError(computeSystem, "CreateProcess", "", err, nil)
  313. }
  314. configuration := string(configurationb)
  315. logrus.Debugf(title+" config=%s", configuration)
  316. err = hcsCreateProcess(computeSystem.handle, configuration, &processInfo, &processHandle, &resultp)
  317. events := processHcsResult(resultp)
  318. if err != nil {
  319. return nil, makeSystemError(computeSystem, "CreateProcess", configuration, err, events)
  320. }
  321. process := &Process{
  322. handle: processHandle,
  323. processID: int(processInfo.ProcessId),
  324. system: computeSystem,
  325. cachedPipes: &cachedPipes{
  326. stdIn: processInfo.StdInput,
  327. stdOut: processInfo.StdOutput,
  328. stdErr: processInfo.StdError,
  329. },
  330. }
  331. if err := process.registerCallback(); err != nil {
  332. return nil, makeSystemError(computeSystem, "CreateProcess", "", err, nil)
  333. }
  334. logrus.Debugf(title+" succeeded processid=%d", process.processID)
  335. return process, nil
  336. }
  337. // OpenProcess gets an interface to an existing process within the computeSystem.
  338. func (computeSystem *System) OpenProcess(pid int) (*Process, error) {
  339. computeSystem.handleLock.RLock()
  340. defer computeSystem.handleLock.RUnlock()
  341. title := "hcsshim::ComputeSystem::OpenProcess ID=" + computeSystem.ID()
  342. logrus.Debugf(title+" processid=%d", pid)
  343. var (
  344. processHandle hcsProcess
  345. resultp *uint16
  346. )
  347. if computeSystem.handle == 0 {
  348. return nil, makeSystemError(computeSystem, "OpenProcess", "", ErrAlreadyClosed, nil)
  349. }
  350. err := hcsOpenProcess(computeSystem.handle, uint32(pid), &processHandle, &resultp)
  351. events := processHcsResult(resultp)
  352. if err != nil {
  353. return nil, makeSystemError(computeSystem, "OpenProcess", "", err, events)
  354. }
  355. process := &Process{
  356. handle: processHandle,
  357. processID: pid,
  358. system: computeSystem,
  359. }
  360. if err := process.registerCallback(); err != nil {
  361. return nil, makeSystemError(computeSystem, "OpenProcess", "", err, nil)
  362. }
  363. logrus.Debugf(title+" succeeded processid=%s", process.processID)
  364. return process, nil
  365. }
  366. // Close cleans up any state associated with the compute system but does not terminate or wait for it.
  367. func (computeSystem *System) Close() error {
  368. computeSystem.handleLock.Lock()
  369. defer computeSystem.handleLock.Unlock()
  370. title := "hcsshim::ComputeSystem::Close ID=" + computeSystem.ID()
  371. logrus.Debugf(title)
  372. // Don't double free this
  373. if computeSystem.handle == 0 {
  374. return nil
  375. }
  376. if err := computeSystem.unregisterCallback(); err != nil {
  377. return makeSystemError(computeSystem, "Close", "", err, nil)
  378. }
  379. if err := hcsCloseComputeSystem(computeSystem.handle); err != nil {
  380. return makeSystemError(computeSystem, "Close", "", err, nil)
  381. }
  382. computeSystem.handle = 0
  383. logrus.Debugf(title + " succeeded")
  384. return nil
  385. }
  386. func (computeSystem *System) registerCallback() error {
  387. context := &notifcationWatcherContext{
  388. channels: newChannels(),
  389. }
  390. callbackMapLock.Lock()
  391. callbackNumber := nextCallback
  392. nextCallback++
  393. callbackMap[callbackNumber] = context
  394. callbackMapLock.Unlock()
  395. var callbackHandle hcsCallback
  396. err := hcsRegisterComputeSystemCallback(computeSystem.handle, notificationWatcherCallback, callbackNumber, &callbackHandle)
  397. if err != nil {
  398. return err
  399. }
  400. context.handle = callbackHandle
  401. computeSystem.callbackNumber = callbackNumber
  402. return nil
  403. }
  404. func (computeSystem *System) unregisterCallback() error {
  405. callbackNumber := computeSystem.callbackNumber
  406. callbackMapLock.RLock()
  407. context := callbackMap[callbackNumber]
  408. callbackMapLock.RUnlock()
  409. if context == nil {
  410. return nil
  411. }
  412. handle := context.handle
  413. if handle == 0 {
  414. return nil
  415. }
  416. // hcsUnregisterComputeSystemCallback has its own syncronization
  417. // to wait for all callbacks to complete. We must NOT hold the callbackMapLock.
  418. err := hcsUnregisterComputeSystemCallback(handle)
  419. if err != nil {
  420. return err
  421. }
  422. closeChannels(context.channels)
  423. callbackMapLock.Lock()
  424. callbackMap[callbackNumber] = nil
  425. callbackMapLock.Unlock()
  426. handle = 0
  427. return nil
  428. }
  429. // Modifies the System by sending a request to HCS
  430. func (computeSystem *System) Modify(config interface{}) error {
  431. computeSystem.handleLock.RLock()
  432. defer computeSystem.handleLock.RUnlock()
  433. title := "hcsshim::Modify ID=" + computeSystem.id
  434. if computeSystem.handle == 0 {
  435. return makeSystemError(computeSystem, "Modify", "", ErrAlreadyClosed, nil)
  436. }
  437. requestJSON, err := json.Marshal(config)
  438. if err != nil {
  439. return err
  440. }
  441. requestString := string(requestJSON)
  442. logrus.Debugf(title + " " + requestString)
  443. var resultp *uint16
  444. err = hcsModifyComputeSystem(computeSystem.handle, requestString, &resultp)
  445. events := processHcsResult(resultp)
  446. if err != nil {
  447. return makeSystemError(computeSystem, "Modify", requestString, err, events)
  448. }
  449. logrus.Debugf(title + " succeeded ")
  450. return nil
  451. }