package libnetwork import ( "context" "encoding/json" "fmt" "net" "sort" "strings" "sync" "github.com/containerd/log" "github.com/docker/docker/libnetwork/etchosts" "github.com/docker/docker/libnetwork/osl" "github.com/docker/docker/libnetwork/types" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) // SandboxOption is an option setter function type used to pass various options to // NewNetContainer method. The various setter functions of type SandboxOption are // provided by libnetwork, they look like ContainerOptionXXXX(...) type SandboxOption func(sb *Sandbox) func (sb *Sandbox) processOptions(options ...SandboxOption) { for _, opt := range options { if opt != nil { opt(sb) } } } // Sandbox provides the control over the network container entity. // It is a one to one mapping with the container. type Sandbox struct { id string containerID string config containerConfig extDNS []extDNSEntry osSbox *osl.Namespace controller *Controller resolver *Resolver resolverOnce sync.Once endpoints []*Endpoint epPriority map[string]int populatedEndpoints map[string]struct{} joinLeaveDone chan struct{} dbIndex uint64 dbExists bool isStub bool inDelete bool ingress bool ndotsSet bool oslTypes []osl.SandboxType // slice of properties of this sandbox loadBalancerNID string // NID that this SB is a load balancer for mu sync.Mutex // This mutex is used to serialize service related operation for an endpoint // The lock is here because the endpoint is saved into the store so is not unique service sync.Mutex } // These are the container configs used to customize container /etc/hosts file. type hostsPathConfig struct { hostName string domainName string hostsPath string originHostsPath string extraHosts []extraHost parentUpdates []parentUpdate } type parentUpdate struct { cid string name string ip string } type extraHost struct { name string IP string } // These are the container configs used to customize container /etc/resolv.conf file. type resolvConfPathConfig struct { resolvConfPath string originResolvConfPath string resolvConfHashFile string dnsList []string dnsSearchList []string dnsOptionsList []string } type containerConfig struct { containerConfigOS //nolint:nolintlint,unused // only populated on windows hostsPathConfig resolvConfPathConfig generic map[string]interface{} useDefaultSandBox bool useExternalKey bool exposedPorts []types.TransportPort } // ID returns the ID of the sandbox. func (sb *Sandbox) ID() string { return sb.id } // ContainerID returns the container id associated to this sandbox. func (sb *Sandbox) ContainerID() string { return sb.containerID } // Key returns the sandbox's key. func (sb *Sandbox) Key() string { if sb.config.useDefaultSandBox { return osl.GenerateKey("default") } return osl.GenerateKey(sb.id) } // Labels returns the sandbox's labels. func (sb *Sandbox) Labels() map[string]interface{} { sb.mu.Lock() defer sb.mu.Unlock() opts := make(map[string]interface{}, len(sb.config.generic)) for k, v := range sb.config.generic { opts[k] = v } return opts } // Delete destroys this container after detaching it from all connected endpoints. func (sb *Sandbox) Delete() error { return sb.delete(false) } func (sb *Sandbox) delete(force bool) error { sb.mu.Lock() if sb.inDelete { sb.mu.Unlock() return types.ForbiddenErrorf("another sandbox delete in progress") } // Set the inDelete flag. This will ensure that we don't // update the store until we have completed all the endpoint // leaves and deletes. And when endpoint leaves and deletes // are completed then we can finally delete the sandbox object // altogether from the data store. If the daemon exits // ungracefully in the middle of a sandbox delete this way we // will have all the references to the endpoints in the // sandbox so that we can clean them up when we restart sb.inDelete = true sb.mu.Unlock() c := sb.controller // Detach from all endpoints retain := false for _, ep := range sb.Endpoints() { // gw network endpoint detach and removal are automatic if ep.endpointInGWNetwork() && !force { continue } // Retain the sanbdox if we can't obtain the network from store. if _, err := c.getNetworkFromStore(ep.getNetwork().ID()); err != nil { if !c.isSwarmNode() { retain = true } log.G(context.TODO()).Warnf("Failed getting network for ep %s during sandbox %s delete: %v", ep.ID(), sb.ID(), err) continue } if !force { if err := ep.Leave(sb); err != nil { log.G(context.TODO()).Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err) } } if err := ep.Delete(force); err != nil { log.G(context.TODO()).Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err) } } if retain { sb.mu.Lock() sb.inDelete = false sb.mu.Unlock() return fmt.Errorf("could not cleanup all the endpoints in container %s / sandbox %s", sb.containerID, sb.id) } // Container is going away. Path cache in etchosts is most // likely not required any more. Drop it. etchosts.Drop(sb.config.hostsPath) if sb.resolver != nil { sb.resolver.Stop() } if sb.osSbox != nil && !sb.config.useDefaultSandBox { if err := sb.osSbox.Destroy(); err != nil { log.G(context.TODO()).WithError(err).Warn("error destroying network sandbox") } } if err := sb.storeDelete(); err != nil { log.G(context.TODO()).Warnf("Failed to delete sandbox %s from store: %v", sb.ID(), err) } c.mu.Lock() if sb.ingress { c.ingressSandbox = nil } delete(c.sandboxes, sb.ID()) c.mu.Unlock() return nil } // Rename changes the name of all attached Endpoints. func (sb *Sandbox) Rename(name string) error { var err error for _, ep := range sb.Endpoints() { if ep.endpointInGWNetwork() { continue } oldName := ep.Name() lEp := ep if err = ep.rename(name); err != nil { break } defer func() { if err != nil { if err2 := lEp.rename(oldName); err2 != nil { log.G(context.TODO()).WithField("old", oldName).WithField("origError", err).WithError(err2).Error("error renaming sandbox") } } }() } return err } // Refresh leaves all the endpoints, resets and re-applies the options, // re-joins all the endpoints without destroying the osl sandbox func (sb *Sandbox) Refresh(options ...SandboxOption) error { // Store connected endpoints epList := sb.Endpoints() // Detach from all endpoints for _, ep := range epList { if err := ep.Leave(sb); err != nil { log.G(context.TODO()).Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err) } } // Re-apply options sb.config = containerConfig{} sb.processOptions(options...) // Setup discovery files if err := sb.setupResolutionFiles(); err != nil { return err } // Re-connect to all endpoints for _, ep := range epList { if err := ep.Join(sb); err != nil { log.G(context.TODO()).Warnf("Failed attach sandbox %s to endpoint %s: %v\n", sb.ID(), ep.ID(), err) } } return nil } func (sb *Sandbox) MarshalJSON() ([]byte, error) { sb.mu.Lock() defer sb.mu.Unlock() // We are just interested in the container ID. This can be expanded to include all of containerInfo if there is a need return json.Marshal(sb.id) } func (sb *Sandbox) UnmarshalJSON(b []byte) (err error) { sb.mu.Lock() defer sb.mu.Unlock() var id string if err := json.Unmarshal(b, &id); err != nil { return err } sb.id = id return nil } // Endpoints returns all the endpoints connected to the sandbox. func (sb *Sandbox) Endpoints() []*Endpoint { sb.mu.Lock() defer sb.mu.Unlock() eps := make([]*Endpoint, len(sb.endpoints)) copy(eps, sb.endpoints) return eps } func (sb *Sandbox) addEndpoint(ep *Endpoint) { sb.mu.Lock() defer sb.mu.Unlock() l := len(sb.endpoints) i := sort.Search(l, func(j int) bool { return ep.Less(sb.endpoints[j]) }) sb.endpoints = append(sb.endpoints, nil) copy(sb.endpoints[i+1:], sb.endpoints[i:]) sb.endpoints[i] = ep } func (sb *Sandbox) removeEndpoint(ep *Endpoint) { sb.mu.Lock() defer sb.mu.Unlock() sb.removeEndpointRaw(ep) } func (sb *Sandbox) removeEndpointRaw(ep *Endpoint) { for i, e := range sb.endpoints { if e == ep { sb.endpoints = append(sb.endpoints[:i], sb.endpoints[i+1:]...) return } } } func (sb *Sandbox) GetEndpoint(id string) *Endpoint { sb.mu.Lock() defer sb.mu.Unlock() for _, ep := range sb.endpoints { if ep.id == id { return ep } } return nil } func (sb *Sandbox) HandleQueryResp(name string, ip net.IP) { for _, ep := range sb.Endpoints() { n := ep.getNetwork() n.HandleQueryResp(name, ip) } } func (sb *Sandbox) ResolveIP(ctx context.Context, ip string) string { var svc string log.G(ctx).Debugf("IP To resolve %v", ip) for _, ep := range sb.Endpoints() { n := ep.getNetwork() svc = n.ResolveIP(ctx, ip) if len(svc) != 0 { return svc } } return svc } // ResolveService returns all the backend details about the containers or hosts // backing a service. Its purpose is to satisfy an SRV query. func (sb *Sandbox) ResolveService(ctx context.Context, name string) ([]*net.SRV, []net.IP) { log.G(ctx).Debugf("Service name To resolve: %v", name) // There are DNS implementations that allow SRV queries for names not in // the format defined by RFC 2782. Hence specific validations checks are // not done if parts := strings.SplitN(name, ".", 3); len(parts) < 3 { return nil, nil } for _, ep := range sb.Endpoints() { n := ep.getNetwork() srv, ip := n.ResolveService(ctx, name) if len(srv) > 0 { return srv, ip } } return nil, nil } func (sb *Sandbox) ResolveName(ctx context.Context, name string, ipType int) ([]net.IP, bool) { // Embedded server owns the docker network domain. Resolution should work // for both container_name and container_name.network_name // We allow '.' in service name and network name. For a name a.b.c.d the // following have to tried; // {a.b.c.d in the networks container is connected to} // {a.b.c in network d}, // {a.b in network c.d}, // {a in network b.c.d}, log.G(ctx).Debugf("Name To resolve: %v", name) name = strings.TrimSuffix(name, ".") reqName := []string{name} networkName := []string{""} if strings.Contains(name, ".") { var i int dup := name for { if i = strings.LastIndex(dup, "."); i == -1 { break } networkName = append(networkName, name[i+1:]) reqName = append(reqName, name[:i]) dup = dup[:i] } } epList := sb.Endpoints() // In swarm mode, services with exposed ports are connected to user overlay // network, ingress network and docker_gwbridge networks. Name resolution // should prioritize returning the VIP/IPs on user overlay network. // // Re-order the endpoints based on the network-type they're attached to; // // 1. dynamic networks (user overlay networks) // 2. ingress network(s) // 3. local networks ("docker_gwbridge") if sb.controller.isSwarmNode() { sort.Sort(ByNetworkType(epList)) } for i := 0; i < len(reqName); i++ { // First check for local container alias ip, ipv6Miss := sb.resolveName(ctx, reqName[i], networkName[i], epList, true, ipType) if ip != nil { return ip, false } if ipv6Miss { return ip, ipv6Miss } // Resolve the actual container name ip, ipv6Miss = sb.resolveName(ctx, reqName[i], networkName[i], epList, false, ipType) if ip != nil { return ip, false } if ipv6Miss { return ip, ipv6Miss } } return nil, false } func (sb *Sandbox) resolveName(ctx context.Context, nameOrAlias string, networkName string, epList []*Endpoint, lookupAlias bool, ipType int) (_ []net.IP, ipv6Miss bool) { ctx, span := otel.Tracer("").Start(ctx, "Sandbox.resolveName", trace.WithAttributes( attribute.String("libnet.resolver.name-or-alias", nameOrAlias), attribute.String("libnet.network.name", networkName), attribute.Bool("libnet.resolver.alias-lookup", lookupAlias), attribute.Int("libnet.resolver.ip-family", ipType))) defer span.End() for _, ep := range epList { if lookupAlias && len(ep.aliases) == 0 { continue } nw := ep.getNetwork() if networkName != "" && networkName != nw.Name() { continue } name := nameOrAlias if lookupAlias { ep.mu.Lock() alias, ok := ep.aliases[nameOrAlias] ep.mu.Unlock() if !ok { continue } name = alias } else { // If it is a regular lookup and if the requested name is an alias // don't perform a svc lookup for this endpoint. ep.mu.Lock() _, ok := ep.aliases[nameOrAlias] ep.mu.Unlock() if ok { continue } } ip, miss := nw.ResolveName(ctx, name, ipType) if ip != nil { return ip, false } if miss { ipv6Miss = miss } } return nil, ipv6Miss } // hasExternalAccess returns true if any of sb's Endpoints appear to have external // network access. func (sb *Sandbox) hasExternalAccess() bool { for _, ep := range sb.Endpoints() { nw := ep.getNetwork() if nw.Internal() || nw.Type() == "null" || nw.Type() == "host" { continue } if ep.hasGatewayOrDefaultRoute() { return true } } return false } // EnableService makes a managed container's service available by adding the // endpoint to the service load balancer and service discovery. func (sb *Sandbox) EnableService() (err error) { log.G(context.TODO()).Debugf("EnableService %s START", sb.containerID) defer func() { if err != nil { if err2 := sb.DisableService(); err2 != nil { log.G(context.TODO()).WithError(err2).WithField("origError", err).Error("Error while disabling service after original error") } } }() for _, ep := range sb.Endpoints() { if !ep.isServiceEnabled() { if err := ep.addServiceInfoToCluster(sb); err != nil { return fmt.Errorf("could not update state for endpoint %s into cluster: %v", ep.Name(), err) } ep.enableService() } } log.G(context.TODO()).Debugf("EnableService %s DONE", sb.containerID) return nil } // DisableService removes a managed container's endpoints from the load balancer // and service discovery. func (sb *Sandbox) DisableService() (err error) { log.G(context.TODO()).Debugf("DisableService %s START", sb.containerID) failedEps := []string{} defer func() { if len(failedEps) > 0 { err = fmt.Errorf("failed to disable service on sandbox:%s, for endpoints %s", sb.ID(), strings.Join(failedEps, ",")) } }() for _, ep := range sb.Endpoints() { if ep.isServiceEnabled() { if err := ep.deleteServiceInfoFromCluster(sb, false, "DisableService"); err != nil { failedEps = append(failedEps, ep.Name()) log.G(context.TODO()).Warnf("failed update state for endpoint %s into cluster: %v", ep.Name(), err) } ep.disableService() } } log.G(context.TODO()).Debugf("DisableService %s DONE", sb.containerID) return nil } func (sb *Sandbox) clearNetworkResources(origEp *Endpoint) error { ep := sb.GetEndpoint(origEp.id) if ep == nil { return fmt.Errorf("could not find the sandbox endpoint data for endpoint %s", origEp.id) } sb.mu.Lock() osSbox := sb.osSbox inDelete := sb.inDelete sb.mu.Unlock() if osSbox != nil { releaseOSSboxResources(osSbox, ep) } sb.mu.Lock() delete(sb.populatedEndpoints, ep.ID()) if len(sb.endpoints) == 0 { // sb.endpoints should never be empty and this is unexpected error condition // We log an error message to note this down for debugging purposes. log.G(context.TODO()).Errorf("No endpoints in sandbox while trying to remove endpoint %s", ep.Name()) sb.mu.Unlock() return nil } var ( gwepBefore, gwepAfter *Endpoint index = -1 ) for i, e := range sb.endpoints { if e == ep { index = i } if len(e.Gateway()) > 0 && gwepBefore == nil { gwepBefore = e } if index != -1 && gwepBefore != nil { break } } if index == -1 { log.G(context.TODO()).Warnf("Endpoint %s has already been deleted", ep.Name()) sb.mu.Unlock() return nil } sb.removeEndpointRaw(ep) for _, e := range sb.endpoints { if len(e.Gateway()) > 0 { gwepAfter = e break } } delete(sb.epPriority, ep.ID()) sb.mu.Unlock() if gwepAfter != nil && gwepBefore != gwepAfter { if err := sb.updateGateway(gwepAfter); err != nil { return err } } // Only update the store if we did not come here as part of // sandbox delete. If we came here as part of delete then do // not bother updating the store. The sandbox object will be // deleted anyway if !inDelete { return sb.storeUpdate() } return nil } // joinLeaveStart waits to ensure there are no joins or leaves in progress and // marks this join/leave in progress without race func (sb *Sandbox) joinLeaveStart() { sb.mu.Lock() defer sb.mu.Unlock() for sb.joinLeaveDone != nil { joinLeaveDone := sb.joinLeaveDone sb.mu.Unlock() <-joinLeaveDone sb.mu.Lock() } sb.joinLeaveDone = make(chan struct{}) } // joinLeaveEnd marks the end of this join/leave operation and // signals the same without race to other join and leave waiters func (sb *Sandbox) joinLeaveEnd() { sb.mu.Lock() defer sb.mu.Unlock() if sb.joinLeaveDone != nil { close(sb.joinLeaveDone) sb.joinLeaveDone = nil } } // <=> Returns true if a < b, false if a > b and advances to next level if a == b // epi.prio <=> epj.prio # 2 < 1 // epi.gw <=> epj.gw # non-gw < gw // epi.internal <=> epj.internal # non-internal < internal // epi.joininfo <=> epj.joininfo # ipv6 < ipv4 // epi.name <=> epj.name # bar < foo func (epi *Endpoint) Less(epj *Endpoint) bool { var prioi, prioj int sbi, _ := epi.getSandbox() sbj, _ := epj.getSandbox() // Prio defaults to 0 if sbi != nil { prioi = sbi.epPriority[epi.ID()] } if sbj != nil { prioj = sbj.epPriority[epj.ID()] } if prioi != prioj { return prioi > prioj } gwi := epi.endpointInGWNetwork() gwj := epj.endpointInGWNetwork() if gwi != gwj { return gwj } inti := epi.getNetwork().Internal() intj := epj.getNetwork().Internal() if inti != intj { return intj } jii := 0 if epi.joinInfo != nil { if epi.joinInfo.gw != nil { jii = jii + 1 } if epi.joinInfo.gw6 != nil { jii = jii + 2 } } jij := 0 if epj.joinInfo != nil { if epj.joinInfo.gw != nil { jij = jij + 1 } if epj.joinInfo.gw6 != nil { jij = jij + 2 } } if jii != jij { return jii > jij } return epi.network.Name() < epj.network.Name() } func (sb *Sandbox) NdotsSet() bool { return sb.ndotsSet }