123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656 |
- package hcs
- import (
- "context"
- "encoding/json"
- "errors"
- "os"
- "strconv"
- "strings"
- "sync"
- "syscall"
- "time"
- "github.com/Microsoft/hcsshim/internal/cow"
- "github.com/Microsoft/hcsshim/internal/log"
- "github.com/Microsoft/hcsshim/internal/oc"
- "github.com/Microsoft/hcsshim/internal/schema1"
- hcsschema "github.com/Microsoft/hcsshim/internal/schema2"
- "github.com/Microsoft/hcsshim/internal/timeout"
- "github.com/Microsoft/hcsshim/internal/vmcompute"
- "go.opencensus.io/trace"
- )
- // currentContainerStarts is used to limit the number of concurrent container
- // starts.
- var currentContainerStarts containerStarts
- type containerStarts struct {
- maxParallel int
- inProgress int
- sync.Mutex
- }
- func init() {
- mpsS := os.Getenv("HCSSHIM_MAX_PARALLEL_START")
- if len(mpsS) > 0 {
- mpsI, err := strconv.Atoi(mpsS)
- if err != nil || mpsI < 0 {
- return
- }
- currentContainerStarts.maxParallel = mpsI
- }
- }
- type System struct {
- handleLock sync.RWMutex
- handle vmcompute.HcsSystem
- id string
- callbackNumber uintptr
- closedWaitOnce sync.Once
- waitBlock chan struct{}
- waitError error
- exitError error
- os, typ string
- }
- func newSystem(id string) *System {
- return &System{
- id: id,
- waitBlock: make(chan struct{}),
- }
- }
- // CreateComputeSystem creates a new compute system with the given configuration but does not start it.
- func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface interface{}) (_ *System, err error) {
- operation := "hcsshim::CreateComputeSystem"
- // hcsCreateComputeSystemContext is an async operation. Start the outer span
- // here to measure the full create time.
- ctx, span := trace.StartSpan(ctx, operation)
- defer span.End()
- defer func() { oc.SetSpanStatus(span, err) }()
- span.AddAttributes(trace.StringAttribute("cid", id))
- computeSystem := newSystem(id)
- hcsDocumentB, err := json.Marshal(hcsDocumentInterface)
- if err != nil {
- return nil, err
- }
- hcsDocument := string(hcsDocumentB)
- var (
- identity syscall.Handle
- resultJSON string
- createError error
- )
- computeSystem.handle, resultJSON, createError = vmcompute.HcsCreateComputeSystem(ctx, id, hcsDocument, identity)
- if createError == nil || IsPending(createError) {
- defer func() {
- if err != nil {
- computeSystem.Close()
- }
- }()
- if err = computeSystem.registerCallback(ctx); err != nil {
- // Terminate the compute system if it still exists. We're okay to
- // ignore a failure here.
- computeSystem.Terminate(ctx)
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- }
- events, err := processAsyncHcsResult(ctx, createError, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemCreateCompleted, &timeout.SystemCreate)
- if err != nil {
- if err == ErrTimeout {
- // Terminate the compute system if it still exists. We're okay to
- // ignore a failure here.
- computeSystem.Terminate(ctx)
- }
- return nil, makeSystemError(computeSystem, operation, hcsDocument, err, events)
- }
- go computeSystem.waitBackground()
- if err = computeSystem.getCachedProperties(ctx); err != nil {
- return nil, err
- }
- return computeSystem, nil
- }
- // OpenComputeSystem opens an existing compute system by ID.
- func OpenComputeSystem(ctx context.Context, id string) (*System, error) {
- operation := "hcsshim::OpenComputeSystem"
- computeSystem := newSystem(id)
- handle, resultJSON, err := vmcompute.HcsOpenComputeSystem(ctx, id)
- events := processHcsResult(ctx, resultJSON)
- if err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, events)
- }
- computeSystem.handle = handle
- defer func() {
- if err != nil {
- computeSystem.Close()
- }
- }()
- if err = computeSystem.registerCallback(ctx); err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- go computeSystem.waitBackground()
- if err = computeSystem.getCachedProperties(ctx); err != nil {
- return nil, err
- }
- return computeSystem, nil
- }
- func (computeSystem *System) getCachedProperties(ctx context.Context) error {
- props, err := computeSystem.Properties(ctx)
- if err != nil {
- return err
- }
- computeSystem.typ = strings.ToLower(props.SystemType)
- computeSystem.os = strings.ToLower(props.RuntimeOSType)
- if computeSystem.os == "" && computeSystem.typ == "container" {
- // Pre-RS5 HCS did not return the OS, but it only supported containers
- // that ran Windows.
- computeSystem.os = "windows"
- }
- return nil
- }
- // OS returns the operating system of the compute system, "linux" or "windows".
- func (computeSystem *System) OS() string {
- return computeSystem.os
- }
- // IsOCI returns whether processes in the compute system should be created via
- // OCI.
- func (computeSystem *System) IsOCI() bool {
- return computeSystem.os == "linux" && computeSystem.typ == "container"
- }
- // GetComputeSystems gets a list of the compute systems on the system that match the query
- func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]schema1.ContainerProperties, error) {
- operation := "hcsshim::GetComputeSystems"
- queryb, err := json.Marshal(q)
- if err != nil {
- return nil, err
- }
- computeSystemsJSON, resultJSON, err := vmcompute.HcsEnumerateComputeSystems(ctx, string(queryb))
- events := processHcsResult(ctx, resultJSON)
- if err != nil {
- return nil, &HcsError{Op: operation, Err: err, Events: events}
- }
- if computeSystemsJSON == "" {
- return nil, ErrUnexpectedValue
- }
- computeSystems := []schema1.ContainerProperties{}
- if err = json.Unmarshal([]byte(computeSystemsJSON), &computeSystems); err != nil {
- return nil, err
- }
- return computeSystems, nil
- }
- // Start synchronously starts the computeSystem.
- func (computeSystem *System) Start(ctx context.Context) (err error) {
- operation := "hcsshim::System::Start"
- // hcsStartComputeSystemContext is an async operation. Start the outer span
- // here to measure the full start time.
- ctx, span := trace.StartSpan(ctx, operation)
- defer span.End()
- defer func() { oc.SetSpanStatus(span, err) }()
- span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- if computeSystem.handle == 0 {
- return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
- }
- // This is a very simple backoff-retry loop to limit the number
- // of parallel container starts if environment variable
- // HCSSHIM_MAX_PARALLEL_START is set to a positive integer.
- // It should generally only be used as a workaround to various
- // platform issues that exist between RS1 and RS4 as of Aug 2018
- if currentContainerStarts.maxParallel > 0 {
- for {
- currentContainerStarts.Lock()
- if currentContainerStarts.inProgress < currentContainerStarts.maxParallel {
- currentContainerStarts.inProgress++
- currentContainerStarts.Unlock()
- break
- }
- if currentContainerStarts.inProgress == currentContainerStarts.maxParallel {
- currentContainerStarts.Unlock()
- time.Sleep(100 * time.Millisecond)
- }
- }
- // Make sure we decrement the count when we are done.
- defer func() {
- currentContainerStarts.Lock()
- currentContainerStarts.inProgress--
- currentContainerStarts.Unlock()
- }()
- }
- resultJSON, err := vmcompute.HcsStartComputeSystem(ctx, computeSystem.handle, "")
- events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemStartCompleted, &timeout.SystemStart)
- if err != nil {
- return makeSystemError(computeSystem, operation, "", err, events)
- }
- return nil
- }
- // ID returns the compute system's identifier.
- func (computeSystem *System) ID() string {
- return computeSystem.id
- }
- // Shutdown requests a compute system shutdown.
- func (computeSystem *System) Shutdown(ctx context.Context) error {
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- operation := "hcsshim::System::Shutdown"
- if computeSystem.handle == 0 {
- return nil
- }
- resultJSON, err := vmcompute.HcsShutdownComputeSystem(ctx, computeSystem.handle, "")
- events := processHcsResult(ctx, resultJSON)
- switch err {
- case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending:
- default:
- return makeSystemError(computeSystem, operation, "", err, events)
- }
- return nil
- }
- // Terminate requests a compute system terminate.
- func (computeSystem *System) Terminate(ctx context.Context) error {
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- operation := "hcsshim::System::Terminate"
- if computeSystem.handle == 0 {
- return nil
- }
- resultJSON, err := vmcompute.HcsTerminateComputeSystem(ctx, computeSystem.handle, "")
- events := processHcsResult(ctx, resultJSON)
- switch err {
- case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending:
- default:
- return makeSystemError(computeSystem, operation, "", err, events)
- }
- return nil
- }
- // waitBackground waits for the compute system exit notification. Once received
- // sets `computeSystem.waitError` (if any) and unblocks all `Wait` calls.
- //
- // This MUST be called exactly once per `computeSystem.handle` but `Wait` is
- // safe to call multiple times.
- func (computeSystem *System) waitBackground() {
- operation := "hcsshim::System::waitBackground"
- ctx, span := trace.StartSpan(context.Background(), operation)
- defer span.End()
- span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
- err := waitForNotification(ctx, computeSystem.callbackNumber, hcsNotificationSystemExited, nil)
- switch err {
- case nil:
- log.G(ctx).Debug("system exited")
- case ErrVmcomputeUnexpectedExit:
- log.G(ctx).Debug("unexpected system exit")
- computeSystem.exitError = makeSystemError(computeSystem, operation, "", err, nil)
- err = nil
- default:
- err = makeSystemError(computeSystem, operation, "", err, nil)
- }
- computeSystem.closedWaitOnce.Do(func() {
- computeSystem.waitError = err
- close(computeSystem.waitBlock)
- })
- oc.SetSpanStatus(span, err)
- }
- // Wait synchronously waits for the compute system to shutdown or terminate. If
- // the compute system has already exited returns the previous error (if any).
- func (computeSystem *System) Wait() error {
- <-computeSystem.waitBlock
- return computeSystem.waitError
- }
- // ExitError returns an error describing the reason the compute system terminated.
- func (computeSystem *System) ExitError() error {
- select {
- case <-computeSystem.waitBlock:
- if computeSystem.waitError != nil {
- return computeSystem.waitError
- }
- return computeSystem.exitError
- default:
- return errors.New("container not exited")
- }
- }
- // Properties returns the requested container properties targeting a V1 schema container.
- func (computeSystem *System) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) {
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- operation := "hcsshim::System::Properties"
- queryBytes, err := json.Marshal(schema1.PropertyQuery{PropertyTypes: types})
- if err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes))
- events := processHcsResult(ctx, resultJSON)
- if err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, events)
- }
- if propertiesJSON == "" {
- return nil, ErrUnexpectedValue
- }
- properties := &schema1.ContainerProperties{}
- if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- return properties, nil
- }
- // PropertiesV2 returns the requested container properties targeting a V2 schema container.
- func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) {
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- operation := "hcsshim::System::PropertiesV2"
- queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types})
- if err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes))
- events := processHcsResult(ctx, resultJSON)
- if err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, events)
- }
- if propertiesJSON == "" {
- return nil, ErrUnexpectedValue
- }
- properties := &hcsschema.Properties{}
- if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- return properties, nil
- }
- // Pause pauses the execution of the computeSystem. This feature is not enabled in TP5.
- func (computeSystem *System) Pause(ctx context.Context) (err error) {
- operation := "hcsshim::System::Pause"
- // hcsPauseComputeSystemContext is an async peration. Start the outer span
- // here to measure the full pause time.
- ctx, span := trace.StartSpan(ctx, operation)
- defer span.End()
- defer func() { oc.SetSpanStatus(span, err) }()
- span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- if computeSystem.handle == 0 {
- return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
- }
- resultJSON, err := vmcompute.HcsPauseComputeSystem(ctx, computeSystem.handle, "")
- events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemPauseCompleted, &timeout.SystemPause)
- if err != nil {
- return makeSystemError(computeSystem, operation, "", err, events)
- }
- return nil
- }
- // Resume resumes the execution of the computeSystem. This feature is not enabled in TP5.
- func (computeSystem *System) Resume(ctx context.Context) (err error) {
- operation := "hcsshim::System::Resume"
- // hcsResumeComputeSystemContext is an async operation. Start the outer span
- // here to measure the full restore time.
- ctx, span := trace.StartSpan(ctx, operation)
- defer span.End()
- defer func() { oc.SetSpanStatus(span, err) }()
- span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- if computeSystem.handle == 0 {
- return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
- }
- resultJSON, err := vmcompute.HcsResumeComputeSystem(ctx, computeSystem.handle, "")
- events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, hcsNotificationSystemResumeCompleted, &timeout.SystemResume)
- if err != nil {
- return makeSystemError(computeSystem, operation, "", err, events)
- }
- return nil
- }
- func (computeSystem *System) createProcess(ctx context.Context, operation string, c interface{}) (*Process, *vmcompute.HcsProcessInformation, error) {
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- if computeSystem.handle == 0 {
- return nil, nil, makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
- }
- configurationb, err := json.Marshal(c)
- if err != nil {
- return nil, nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- configuration := string(configurationb)
- processInfo, processHandle, resultJSON, err := vmcompute.HcsCreateProcess(ctx, computeSystem.handle, configuration)
- events := processHcsResult(ctx, resultJSON)
- if err != nil {
- return nil, nil, makeSystemError(computeSystem, operation, configuration, err, events)
- }
- log.G(ctx).WithField("pid", processInfo.ProcessId).Debug("created process pid")
- return newProcess(processHandle, int(processInfo.ProcessId), computeSystem), &processInfo, nil
- }
- // CreateProcess launches a new process within the computeSystem.
- func (computeSystem *System) CreateProcess(ctx context.Context, c interface{}) (cow.Process, error) {
- operation := "hcsshim::System::CreateProcess"
- process, processInfo, err := computeSystem.createProcess(ctx, operation, c)
- if err != nil {
- return nil, err
- }
- defer func() {
- if err != nil {
- process.Close()
- }
- }()
- pipes, err := makeOpenFiles([]syscall.Handle{processInfo.StdInput, processInfo.StdOutput, processInfo.StdError})
- if err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- process.stdin = pipes[0]
- process.stdout = pipes[1]
- process.stderr = pipes[2]
- process.hasCachedStdio = true
- if err = process.registerCallback(ctx); err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- go process.waitBackground()
- return process, nil
- }
- // OpenProcess gets an interface to an existing process within the computeSystem.
- func (computeSystem *System) OpenProcess(ctx context.Context, pid int) (*Process, error) {
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- operation := "hcsshim::System::OpenProcess"
- if computeSystem.handle == 0 {
- return nil, makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
- }
- processHandle, resultJSON, err := vmcompute.HcsOpenProcess(ctx, computeSystem.handle, uint32(pid))
- events := processHcsResult(ctx, resultJSON)
- if err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, events)
- }
- process := newProcess(processHandle, pid, computeSystem)
- if err = process.registerCallback(ctx); err != nil {
- return nil, makeSystemError(computeSystem, operation, "", err, nil)
- }
- go process.waitBackground()
- return process, nil
- }
- // Close cleans up any state associated with the compute system but does not terminate or wait for it.
- func (computeSystem *System) Close() (err error) {
- operation := "hcsshim::System::Close"
- ctx, span := trace.StartSpan(context.Background(), operation)
- defer span.End()
- defer func() { oc.SetSpanStatus(span, err) }()
- span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
- computeSystem.handleLock.Lock()
- defer computeSystem.handleLock.Unlock()
- // Don't double free this
- if computeSystem.handle == 0 {
- return nil
- }
- if err = computeSystem.unregisterCallback(ctx); err != nil {
- return makeSystemError(computeSystem, operation, "", err, nil)
- }
- err = vmcompute.HcsCloseComputeSystem(ctx, computeSystem.handle)
- if err != nil {
- return makeSystemError(computeSystem, operation, "", err, nil)
- }
- computeSystem.handle = 0
- computeSystem.closedWaitOnce.Do(func() {
- computeSystem.waitError = ErrAlreadyClosed
- close(computeSystem.waitBlock)
- })
- return nil
- }
- func (computeSystem *System) registerCallback(ctx context.Context) error {
- callbackContext := ¬ifcationWatcherContext{
- channels: newSystemChannels(),
- systemID: computeSystem.id,
- }
- callbackMapLock.Lock()
- callbackNumber := nextCallback
- nextCallback++
- callbackMap[callbackNumber] = callbackContext
- callbackMapLock.Unlock()
- callbackHandle, err := vmcompute.HcsRegisterComputeSystemCallback(ctx, computeSystem.handle, notificationWatcherCallback, callbackNumber)
- if err != nil {
- return err
- }
- callbackContext.handle = callbackHandle
- computeSystem.callbackNumber = callbackNumber
- return nil
- }
- func (computeSystem *System) unregisterCallback(ctx context.Context) error {
- callbackNumber := computeSystem.callbackNumber
- callbackMapLock.RLock()
- callbackContext := callbackMap[callbackNumber]
- callbackMapLock.RUnlock()
- if callbackContext == nil {
- return nil
- }
- handle := callbackContext.handle
- if handle == 0 {
- return nil
- }
- // hcsUnregisterComputeSystemCallback has its own syncronization
- // to wait for all callbacks to complete. We must NOT hold the callbackMapLock.
- err := vmcompute.HcsUnregisterComputeSystemCallback(ctx, handle)
- if err != nil {
- return err
- }
- closeChannels(callbackContext.channels)
- callbackMapLock.Lock()
- delete(callbackMap, callbackNumber)
- callbackMapLock.Unlock()
- handle = 0
- return nil
- }
- // Modify the System by sending a request to HCS
- func (computeSystem *System) Modify(ctx context.Context, config interface{}) error {
- computeSystem.handleLock.RLock()
- defer computeSystem.handleLock.RUnlock()
- operation := "hcsshim::System::Modify"
- if computeSystem.handle == 0 {
- return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil)
- }
- requestBytes, err := json.Marshal(config)
- if err != nil {
- return err
- }
- requestJSON := string(requestBytes)
- resultJSON, err := vmcompute.HcsModifyComputeSystem(ctx, computeSystem.handle, requestJSON)
- events := processHcsResult(ctx, resultJSON)
- if err != nil {
- return makeSystemError(computeSystem, operation, requestJSON, err, events)
- }
- return nil
- }
|