service_common.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. //go:build linux || windows
  2. // +build linux windows
  3. package libnetwork
  4. import (
  5. "net"
  6. "github.com/docker/docker/libnetwork/internal/setmatrix"
  7. "github.com/sirupsen/logrus"
  8. )
  9. const maxSetStringLen = 350
  10. func (c *controller) addEndpointNameResolution(svcName, svcID, nID, eID, containerName string, vip net.IP, serviceAliases, taskAliases []string, ip net.IP, addService bool, method string) error {
  11. n, err := c.NetworkByID(nID)
  12. if err != nil {
  13. return err
  14. }
  15. logrus.Debugf("addEndpointNameResolution %s %s add_service:%t sAliases:%v tAliases:%v", eID, svcName, addService, serviceAliases, taskAliases)
  16. // Add container resolution mappings
  17. if err := c.addContainerNameResolution(nID, eID, containerName, taskAliases, ip, method); err != nil {
  18. return err
  19. }
  20. serviceID := svcID
  21. if serviceID == "" {
  22. // This is the case of a normal container not part of a service
  23. serviceID = eID
  24. }
  25. // Add endpoint IP to special "tasks.svc_name" so that the applications have access to DNS RR.
  26. n.(*network).addSvcRecords(eID, "tasks."+svcName, serviceID, ip, nil, false, method)
  27. for _, alias := range serviceAliases {
  28. n.(*network).addSvcRecords(eID, "tasks."+alias, serviceID, ip, nil, false, method)
  29. }
  30. // Add service name to vip in DNS, if vip is valid. Otherwise resort to DNS RR
  31. if len(vip) == 0 {
  32. n.(*network).addSvcRecords(eID, svcName, serviceID, ip, nil, false, method)
  33. for _, alias := range serviceAliases {
  34. n.(*network).addSvcRecords(eID, alias, serviceID, ip, nil, false, method)
  35. }
  36. }
  37. if addService && len(vip) != 0 {
  38. n.(*network).addSvcRecords(eID, svcName, serviceID, vip, nil, false, method)
  39. for _, alias := range serviceAliases {
  40. n.(*network).addSvcRecords(eID, alias, serviceID, vip, nil, false, method)
  41. }
  42. }
  43. return nil
  44. }
  45. func (c *controller) addContainerNameResolution(nID, eID, containerName string, taskAliases []string, ip net.IP, method string) error {
  46. n, err := c.NetworkByID(nID)
  47. if err != nil {
  48. return err
  49. }
  50. logrus.Debugf("addContainerNameResolution %s %s", eID, containerName)
  51. // Add resolution for container name
  52. n.(*network).addSvcRecords(eID, containerName, eID, ip, nil, true, method)
  53. // Add resolution for taskaliases
  54. for _, alias := range taskAliases {
  55. n.(*network).addSvcRecords(eID, alias, eID, ip, nil, false, method)
  56. }
  57. return nil
  58. }
  59. func (c *controller) deleteEndpointNameResolution(svcName, svcID, nID, eID, containerName string, vip net.IP, serviceAliases, taskAliases []string, ip net.IP, rmService, multipleEntries bool, method string) error {
  60. n, err := c.NetworkByID(nID)
  61. if err != nil {
  62. return err
  63. }
  64. logrus.Debugf("deleteEndpointNameResolution %s %s rm_service:%t suppress:%t sAliases:%v tAliases:%v", eID, svcName, rmService, multipleEntries, serviceAliases, taskAliases)
  65. // Delete container resolution mappings
  66. if err := c.delContainerNameResolution(nID, eID, containerName, taskAliases, ip, method); err != nil {
  67. logrus.WithError(err).Warn("Error delting container from resolver")
  68. }
  69. serviceID := svcID
  70. if serviceID == "" {
  71. // This is the case of a normal container not part of a service
  72. serviceID = eID
  73. }
  74. // Delete the special "tasks.svc_name" backend record.
  75. if !multipleEntries {
  76. n.(*network).deleteSvcRecords(eID, "tasks."+svcName, serviceID, ip, nil, false, method)
  77. for _, alias := range serviceAliases {
  78. n.(*network).deleteSvcRecords(eID, "tasks."+alias, serviceID, ip, nil, false, method)
  79. }
  80. }
  81. // If we are doing DNS RR delete the endpoint IP from DNS record right away.
  82. if !multipleEntries && len(vip) == 0 {
  83. n.(*network).deleteSvcRecords(eID, svcName, serviceID, ip, nil, false, method)
  84. for _, alias := range serviceAliases {
  85. n.(*network).deleteSvcRecords(eID, alias, serviceID, ip, nil, false, method)
  86. }
  87. }
  88. // Remove the DNS record for VIP only if we are removing the service
  89. if rmService && len(vip) != 0 && !multipleEntries {
  90. n.(*network).deleteSvcRecords(eID, svcName, serviceID, vip, nil, false, method)
  91. for _, alias := range serviceAliases {
  92. n.(*network).deleteSvcRecords(eID, alias, serviceID, vip, nil, false, method)
  93. }
  94. }
  95. return nil
  96. }
  97. func (c *controller) delContainerNameResolution(nID, eID, containerName string, taskAliases []string, ip net.IP, method string) error {
  98. n, err := c.NetworkByID(nID)
  99. if err != nil {
  100. return err
  101. }
  102. logrus.Debugf("delContainerNameResolution %s %s", eID, containerName)
  103. // Delete resolution for container name
  104. n.(*network).deleteSvcRecords(eID, containerName, eID, ip, nil, true, method)
  105. // Delete resolution for taskaliases
  106. for _, alias := range taskAliases {
  107. n.(*network).deleteSvcRecords(eID, alias, eID, ip, nil, true, method)
  108. }
  109. return nil
  110. }
  111. func newService(name string, id string, ingressPorts []*PortConfig, serviceAliases []string) *service {
  112. return &service{
  113. name: name,
  114. id: id,
  115. ingressPorts: ingressPorts,
  116. loadBalancers: make(map[string]*loadBalancer),
  117. aliases: serviceAliases,
  118. ipToEndpoint: setmatrix.NewSetMatrix(),
  119. }
  120. }
  121. func (c *controller) getLBIndex(sid, nid string, ingressPorts []*PortConfig) int {
  122. skey := serviceKey{
  123. id: sid,
  124. ports: portConfigs(ingressPorts).String(),
  125. }
  126. c.Lock()
  127. s, ok := c.serviceBindings[skey]
  128. c.Unlock()
  129. if !ok {
  130. return 0
  131. }
  132. s.Lock()
  133. lb := s.loadBalancers[nid]
  134. s.Unlock()
  135. return int(lb.fwMark)
  136. }
  137. // cleanupServiceDiscovery when the network is being deleted, erase all the associated service discovery records
  138. func (c *controller) cleanupServiceDiscovery(cleanupNID string) {
  139. c.Lock()
  140. defer c.Unlock()
  141. if cleanupNID == "" {
  142. logrus.Debugf("cleanupServiceDiscovery for all networks")
  143. c.svcRecords = make(map[string]svcInfo)
  144. return
  145. }
  146. logrus.Debugf("cleanupServiceDiscovery for network:%s", cleanupNID)
  147. delete(c.svcRecords, cleanupNID)
  148. }
  149. func (c *controller) cleanupServiceBindings(cleanupNID string) {
  150. var cleanupFuncs []func()
  151. logrus.Debugf("cleanupServiceBindings for %s", cleanupNID)
  152. c.Lock()
  153. services := make([]*service, 0, len(c.serviceBindings))
  154. for _, s := range c.serviceBindings {
  155. services = append(services, s)
  156. }
  157. c.Unlock()
  158. for _, s := range services {
  159. s.Lock()
  160. // Skip the serviceBindings that got deleted
  161. if s.deleted {
  162. s.Unlock()
  163. continue
  164. }
  165. for nid, lb := range s.loadBalancers {
  166. if cleanupNID != "" && nid != cleanupNID {
  167. continue
  168. }
  169. for eid, be := range lb.backEnds {
  170. cleanupFuncs = append(cleanupFuncs, makeServiceCleanupFunc(c, s, nid, eid, lb.vip, be.ip))
  171. }
  172. }
  173. s.Unlock()
  174. }
  175. for _, f := range cleanupFuncs {
  176. f()
  177. }
  178. }
  179. func makeServiceCleanupFunc(c *controller, s *service, nID, eID string, vip net.IP, ip net.IP) func() {
  180. // ContainerName and taskAliases are not available here, this is still fine because the Service discovery
  181. // cleanup already happened before. The only thing that rmServiceBinding is still doing here a part from the Load
  182. // Balancer bookeeping, is to keep consistent the mapping of endpoint to IP.
  183. return func() {
  184. if err := c.rmServiceBinding(s.name, s.id, nID, eID, "", vip, s.ingressPorts, s.aliases, []string{}, ip, "cleanupServiceBindings", false, true); err != nil {
  185. logrus.Errorf("Failed to remove service bindings for service %s network %s endpoint %s while cleanup: %v", s.id, nID, eID, err)
  186. }
  187. }
  188. }
  189. func (c *controller) addServiceBinding(svcName, svcID, nID, eID, containerName string, vip net.IP, ingressPorts []*PortConfig, serviceAliases, taskAliases []string, ip net.IP, method string) error {
  190. var addService bool
  191. // Failure to lock the network ID on add can result in racing
  192. // racing against network deletion resulting in inconsistent
  193. // state in the c.serviceBindings map and it's sub-maps. Also,
  194. // always lock network ID before services to avoid deadlock.
  195. c.networkLocker.Lock(nID)
  196. defer c.networkLocker.Unlock(nID) // nolint:errcheck
  197. n, err := c.NetworkByID(nID)
  198. if err != nil {
  199. return err
  200. }
  201. skey := serviceKey{
  202. id: svcID,
  203. ports: portConfigs(ingressPorts).String(),
  204. }
  205. var s *service
  206. for {
  207. c.Lock()
  208. var ok bool
  209. s, ok = c.serviceBindings[skey]
  210. if !ok {
  211. // Create a new service if we are seeing this service
  212. // for the first time.
  213. s = newService(svcName, svcID, ingressPorts, serviceAliases)
  214. c.serviceBindings[skey] = s
  215. }
  216. c.Unlock()
  217. s.Lock()
  218. if !s.deleted {
  219. // ok the object is good to be used
  220. break
  221. }
  222. s.Unlock()
  223. }
  224. logrus.Debugf("addServiceBinding from %s START for %s %s p:%p nid:%s skey:%v", method, svcName, eID, s, nID, skey)
  225. defer s.Unlock()
  226. lb, ok := s.loadBalancers[nID]
  227. if !ok {
  228. // Create a new load balancer if we are seeing this
  229. // network attachment on the service for the first
  230. // time.
  231. fwMarkCtrMu.Lock()
  232. lb = &loadBalancer{
  233. vip: vip,
  234. fwMark: fwMarkCtr,
  235. backEnds: make(map[string]*lbBackend),
  236. service: s,
  237. }
  238. fwMarkCtr++
  239. fwMarkCtrMu.Unlock()
  240. s.loadBalancers[nID] = lb
  241. addService = true
  242. }
  243. lb.backEnds[eID] = &lbBackend{ip, false}
  244. ok, entries := s.assignIPToEndpoint(ip.String(), eID)
  245. if !ok || entries > 1 {
  246. setStr, b := s.printIPToEndpoint(ip.String())
  247. if len(setStr) > maxSetStringLen {
  248. setStr = setStr[:maxSetStringLen]
  249. }
  250. logrus.Warnf("addServiceBinding %s possible transient state ok:%t entries:%d set:%t %s", eID, ok, entries, b, setStr)
  251. }
  252. // Add loadbalancer service and backend to the network
  253. n.(*network).addLBBackend(ip, lb)
  254. // Add the appropriate name resolutions
  255. if err := c.addEndpointNameResolution(svcName, svcID, nID, eID, containerName, vip, serviceAliases, taskAliases, ip, addService, "addServiceBinding"); err != nil {
  256. return err
  257. }
  258. logrus.Debugf("addServiceBinding from %s END for %s %s", method, svcName, eID)
  259. return nil
  260. }
  261. func (c *controller) rmServiceBinding(svcName, svcID, nID, eID, containerName string, vip net.IP, ingressPorts []*PortConfig, serviceAliases []string, taskAliases []string, ip net.IP, method string, deleteSvcRecords bool, fullRemove bool) error {
  262. var rmService bool
  263. skey := serviceKey{
  264. id: svcID,
  265. ports: portConfigs(ingressPorts).String(),
  266. }
  267. c.Lock()
  268. s, ok := c.serviceBindings[skey]
  269. c.Unlock()
  270. if !ok {
  271. logrus.Warnf("rmServiceBinding %s %s %s aborted c.serviceBindings[skey] !ok", method, svcName, eID)
  272. return nil
  273. }
  274. s.Lock()
  275. defer s.Unlock()
  276. logrus.Debugf("rmServiceBinding from %s START for %s %s p:%p nid:%s sKey:%v deleteSvc:%t", method, svcName, eID, s, nID, skey, deleteSvcRecords)
  277. lb, ok := s.loadBalancers[nID]
  278. if !ok {
  279. logrus.Warnf("rmServiceBinding %s %s %s aborted s.loadBalancers[nid] !ok", method, svcName, eID)
  280. return nil
  281. }
  282. be, ok := lb.backEnds[eID]
  283. if !ok {
  284. logrus.Warnf("rmServiceBinding %s %s %s aborted lb.backEnds[eid] && lb.disabled[eid] !ok", method, svcName, eID)
  285. return nil
  286. }
  287. if fullRemove {
  288. // delete regardless
  289. delete(lb.backEnds, eID)
  290. } else {
  291. be.disabled = true
  292. }
  293. if len(lb.backEnds) == 0 {
  294. // All the backends for this service have been
  295. // removed. Time to remove the load balancer and also
  296. // remove the service entry in IPVS.
  297. rmService = true
  298. delete(s.loadBalancers, nID)
  299. logrus.Debugf("rmServiceBinding %s delete %s, p:%p in loadbalancers len:%d", eID, nID, lb, len(s.loadBalancers))
  300. }
  301. ok, entries := s.removeIPToEndpoint(ip.String(), eID)
  302. if !ok || entries > 0 {
  303. setStr, b := s.printIPToEndpoint(ip.String())
  304. if len(setStr) > maxSetStringLen {
  305. setStr = setStr[:maxSetStringLen]
  306. }
  307. logrus.Warnf("rmServiceBinding %s possible transient state ok:%t entries:%d set:%t %s", eID, ok, entries, b, setStr)
  308. }
  309. // Remove loadbalancer service(if needed) and backend in all
  310. // sandboxes in the network only if the vip is valid.
  311. if entries == 0 {
  312. // The network may well have been deleted before the last
  313. // of the service bindings. That's ok on Linux because
  314. // removing the network sandbox implicitly removes the
  315. // backend service bindings. Windows VFP cleanup requires
  316. // calling cleanupServiceBindings on the network prior to
  317. // deleting the network, performed by network.delete.
  318. n, err := c.NetworkByID(nID)
  319. if err == nil {
  320. n.(*network).rmLBBackend(ip, lb, rmService, fullRemove)
  321. }
  322. }
  323. // Delete the name resolutions
  324. if deleteSvcRecords {
  325. if err := c.deleteEndpointNameResolution(svcName, svcID, nID, eID, containerName, vip, serviceAliases, taskAliases, ip, rmService, entries > 0, "rmServiceBinding"); err != nil {
  326. return err
  327. }
  328. }
  329. if len(s.loadBalancers) == 0 {
  330. // All loadbalancers for the service removed. Time to
  331. // remove the service itself.
  332. c.Lock()
  333. // Mark the object as deleted so that the add won't use it wrongly
  334. s.deleted = true
  335. // NOTE The delete from the serviceBindings map has to be the last operation else we are allowing a race between this service
  336. // that is getting deleted and a new service that will be created if the entry is not anymore there
  337. delete(c.serviceBindings, skey)
  338. c.Unlock()
  339. }
  340. logrus.Debugf("rmServiceBinding from %s END for %s %s", method, svcName, eID)
  341. return nil
  342. }