浏览代码

Fix data race in controller sandboxes

The controller sandboxes hashmap is not being protected by a lock
while deleting it in `LeaveAll` call. This may result in a race
whereby any other read access that happens with the lock held is
also vulnerable to return random sandbox data which could result
in totally unpredictable behavior.

Also as part of the fix check if `s.endpoints` is empty and log an
error in `rmEndpoint` so that we don't bring down the process
for this unexpected error.

Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
Jana Radhakrishnan 10 年之前
父节点
当前提交
092437ad0e
共有 2 个文件被更改,包括 75 次插入20 次删除
  1. 64 17
      libnetwork/libnetwork_test.go
  2. 11 3
      libnetwork/sandboxdata.go

+ 64 - 17
libnetwork/libnetwork_test.go

@@ -1919,14 +1919,30 @@ func createGlobalInstance(t *testing.T) {
 			"AllowNonDefaultBridge": true,
 		},
 	}
-	net, err := createTestNetwork(bridgeNetType, "network", netOption)
+
+	net1, err := controller.NetworkByName("testhost")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	net2, err := createTestNetwork("bridge", "network2", netOption)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = net1.CreateEndpoint("pep1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = net2.CreateEndpoint("pep2")
 	if err != nil {
-		t.Fatal("new network")
+		t.Fatal(err)
 	}
 
-	_, err = net.CreateEndpoint("ep1")
+	_, err = net2.CreateEndpoint("pep3")
 	if err != nil {
-		t.Fatal("createendpoint")
+		t.Fatal(err)
 	}
 }
 
@@ -1940,12 +1956,18 @@ func debugf(format string, a ...interface{}) (int, error) {
 
 func parallelJoin(t *testing.T, ep libnetwork.Endpoint, thrNumber int) {
 	debugf("J%d.", thrNumber)
-	err := ep.Join("racing_container")
+	var err error
+	if thrNumber == first {
+		err = ep.Join(fmt.Sprintf("%drace", thrNumber), libnetwork.JoinOptionUseDefaultSandbox())
+	} else {
+		err = ep.Join(fmt.Sprintf("%drace", thrNumber))
+	}
+
 	runtime.LockOSThread()
 	if err != nil {
 		if _, ok := err.(libnetwork.ErrNoContainer); !ok {
 			if _, ok := err.(libnetwork.ErrInvalidJoin); !ok {
-				t.Fatal(err)
+				t.Fatalf("thread %d: %v", thrNumber, err)
 			}
 		}
 		debugf("JE%d(%v).", thrNumber, err)
@@ -1955,12 +1977,18 @@ func parallelJoin(t *testing.T, ep libnetwork.Endpoint, thrNumber int) {
 
 func parallelLeave(t *testing.T, ep libnetwork.Endpoint, thrNumber int) {
 	debugf("L%d.", thrNumber)
-	err := ep.Leave("racing_container")
+	var err error
+	if thrNumber == first {
+		err = ep.Leave(fmt.Sprintf("%drace", thrNumber))
+	} else {
+		err = controller.LeaveAll(fmt.Sprintf("%drace", thrNumber))
+	}
+
 	runtime.LockOSThread()
 	if err != nil {
 		if _, ok := err.(libnetwork.ErrNoContainer); !ok {
 			if _, ok := err.(libnetwork.ErrInvalidJoin); !ok {
-				t.Fatal(err)
+				t.Fatalf("thread %d: %v", thrNumber, err)
 			}
 		}
 		debugf("LE%d(%v).", thrNumber, err)
@@ -2012,15 +2040,33 @@ func runParallelTests(t *testing.T, thrNumber int) {
 	}
 	defer netns.Set(origns)
 
-	net, err := controller.NetworkByName("network")
+	net1, err := controller.NetworkByName("testhost")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if net1 == nil {
+		t.Fatal("Could not find network1")
+	}
+
+	net2, err := controller.NetworkByName("network2")
 	if err != nil {
 		t.Fatal(err)
 	}
-	if net == nil {
-		t.Fatal("Could not find network")
+	if net2 == nil {
+		t.Fatal("Could not find network2")
+	}
+
+	epName := fmt.Sprintf("pep%d", thrNumber)
+
+	//var err error
+	var ep libnetwork.Endpoint
+
+	if thrNumber == first {
+		ep, err = net1.EndpointByName(epName)
+	} else {
+		ep, err = net2.EndpointByName(epName)
 	}
 
-	ep, err := net.EndpointByName("ep1")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -2035,6 +2081,11 @@ func runParallelTests(t *testing.T, thrNumber int) {
 
 	debugf("\n")
 
+	err = ep.Delete()
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	if thrNumber == first {
 		for thrdone := range done {
 			select {
@@ -2043,12 +2094,8 @@ func runParallelTests(t *testing.T, thrNumber int) {
 		}
 
 		testns.Close()
-		err = ep.Delete()
-		if err != nil {
-			t.Fatal(err)
-		}
 
-		if err := net.Delete(); err != nil {
+		if err := net2.Delete(); err != nil {
 			t.Fatal(err)
 		}
 	}

+ 11 - 3
libnetwork/sandboxdata.go

@@ -139,10 +139,15 @@ func (s *sandboxData) rmEndpoint(ep *endpoint) {
 		}
 	}
 
-	// We don't check if s.endpoints is empty here because
-	// it should never be empty during a rmEndpoint call and
-	// if it is we will rightfully panic here
 	s.Lock()
+	if len(s.endpoints) == 0 {
+		// s.endpoints should never be empty and this is unexpected error condition
+		// We log an error message to note this down for debugging purposes.
+		logrus.Errorf("No endpoints in sandbox while trying to remove endpoint %s", ep.Name())
+		s.Unlock()
+		return
+	}
+
 	highEpBefore := s.endpoints[0]
 	var (
 		i int
@@ -245,7 +250,10 @@ func (c *controller) LeaveAll(id string) error {
 	}
 
 	sData.sandbox().Destroy()
+
+	c.Lock()
 	delete(c.sandboxes, sandbox.GenerateKey(id))
+	c.Unlock()
 
 	return nil
 }