Jelajahi Sumber

Delete service info from cluster when service is disabled

This PR contains a fix for moby/moby#30321. There was a moby/moby#31142
PR intending to fix the issue by adding a delay between disabling the
service in the cluster and the shutdown of the tasks. However
disabling the service was not deleting the service info in the cluster.
Added a fix to delete service info from cluster and verified using siege
to ensure there is zero downtime on rolling update of a service.

Signed-off-by: abhi <abhi@docker.com>
abhi 7 tahun lalu
induk
melakukan
5ab37a55a1
2 mengubah file dengan 39 tambahan dan 17 penghapusan
  1. 17 12
      libnetwork/endpoint.go
  2. 22 5
      libnetwork/sandbox.go

+ 17 - 12
libnetwork/endpoint.go

@@ -311,16 +311,25 @@ func (ep *endpoint) isAnonymous() bool {
 	return ep.anonymous
 }
 
-// enableService sets ep's serviceEnabled to the passed value if it's not in the
-// current state and returns true; false otherwise.
-func (ep *endpoint) enableService(state bool) bool {
+// isServiceEnabled check if service is enabled on the endpoint
+func (ep *endpoint) isServiceEnabled() bool {
 	ep.Lock()
 	defer ep.Unlock()
-	if ep.serviceEnabled != state {
-		ep.serviceEnabled = state
-		return true
-	}
-	return false
+	return ep.serviceEnabled
+}
+
+// enableService sets service enabled on the endpoint
+func (ep *endpoint) enableService() {
+	ep.Lock()
+	defer ep.Unlock()
+	ep.serviceEnabled = true
+}
+
+// disableService disables service on the endpoint
+func (ep *endpoint) disableService() {
+	ep.Lock()
+	defer ep.Unlock()
+	ep.serviceEnabled = false
 }
 
 func (ep *endpoint) needResolver() bool {
@@ -759,10 +768,6 @@ func (ep *endpoint) sbLeave(sb *sandbox, force bool, options ...EndpointOption)
 		return err
 	}
 
-	if e := ep.deleteServiceInfoFromCluster(sb, "sbLeave"); e != nil {
-		logrus.Errorf("Could not delete service state for endpoint %s from cluster: %v", ep.Name(), e)
-	}
-
 	if e := ep.deleteDriverInfoFromCluster(); e != nil {
 		logrus.Errorf("Could not delete endpoint state for endpoint %s from cluster: %v", ep.Name(), e)
 	}

+ 22 - 5
libnetwork/sandbox.go

@@ -674,24 +674,41 @@ func (sb *sandbox) SetKey(basePath string) error {
 	return nil
 }
 
-func (sb *sandbox) EnableService() error {
+func (sb *sandbox) EnableService() (err error) {
 	logrus.Debugf("EnableService %s START", sb.containerID)
+	defer func() {
+		if err != nil {
+			sb.DisableService()
+		}
+	}()
 	for _, ep := range sb.getConnectedEndpoints() {
-		if ep.enableService(true) {
+		if !ep.isServiceEnabled() {
 			if err := ep.addServiceInfoToCluster(sb); err != nil {
-				ep.enableService(false)
 				return fmt.Errorf("could not update state for endpoint %s into cluster: %v", ep.Name(), err)
 			}
+			ep.enableService()
 		}
 	}
 	logrus.Debugf("EnableService %s DONE", sb.containerID)
 	return nil
 }
 
-func (sb *sandbox) DisableService() error {
+func (sb *sandbox) DisableService() (err error) {
 	logrus.Debugf("DisableService %s START", sb.containerID)
+	failedEps := []string{}
+	defer func() {
+		if len(failedEps) > 0 {
+			err = fmt.Errorf("failed to disable service on sandbox:%s, for endpoints %s", sb.ID(), strings.Join(failedEps, ","))
+		}
+	}()
 	for _, ep := range sb.getConnectedEndpoints() {
-		ep.enableService(false)
+		if ep.isServiceEnabled() {
+			if err := ep.deleteServiceInfoFromCluster(sb, "DisableService"); err != nil {
+				failedEps = append(failedEps, ep.Name())
+				logrus.Warnf("failed update state for endpoint %s into cluster: %v", ep.Name(), err)
+			}
+			ep.disableService()
+		}
 	}
 	logrus.Debugf("DisableService %s DONE", sb.containerID)
 	return nil