Fix data race in controller sandboxes

The controller sandboxes hashmap is not being protected by a lock
while deleting it in `LeaveAll` call. This may result in a race
whereby any other read access that happens with the lock held is
also vulnerable to return random sandbox data which could result
in totally unpredictable behavior.

Also as part of the fix check if `s.endpoints` is empty and log an
error in `rmEndpoint` so that we don't bring down the process
for this unexpected error.

Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
This commit is contained in:
Jana Radhakrishnan 2015-07-29 23:57:50 -07:00
parent 964d926aa7
commit 092437ad0e
2 changed files with 75 additions and 20 deletions

View file

@ -1919,14 +1919,30 @@ func createGlobalInstance(t *testing.T) {
"AllowNonDefaultBridge": true,
},
}
net, err := createTestNetwork(bridgeNetType, "network", netOption)
net1, err := controller.NetworkByName("testhost")
if err != nil {
t.Fatal("new network")
t.Fatal(err)
}
_, err = net.CreateEndpoint("ep1")
net2, err := createTestNetwork("bridge", "network2", netOption)
if err != nil {
t.Fatal("createendpoint")
t.Fatal(err)
}
_, err = net1.CreateEndpoint("pep1")
if err != nil {
t.Fatal(err)
}
_, err = net2.CreateEndpoint("pep2")
if err != nil {
t.Fatal(err)
}
_, err = net2.CreateEndpoint("pep3")
if err != nil {
t.Fatal(err)
}
}
@ -1940,12 +1956,18 @@ func debugf(format string, a ...interface{}) (int, error) {
func parallelJoin(t *testing.T, ep libnetwork.Endpoint, thrNumber int) {
debugf("J%d.", thrNumber)
err := ep.Join("racing_container")
var err error
if thrNumber == first {
err = ep.Join(fmt.Sprintf("%drace", thrNumber), libnetwork.JoinOptionUseDefaultSandbox())
} else {
err = ep.Join(fmt.Sprintf("%drace", thrNumber))
}
runtime.LockOSThread()
if err != nil {
if _, ok := err.(libnetwork.ErrNoContainer); !ok {
if _, ok := err.(libnetwork.ErrInvalidJoin); !ok {
t.Fatal(err)
t.Fatalf("thread %d: %v", thrNumber, err)
}
}
debugf("JE%d(%v).", thrNumber, err)
@ -1955,12 +1977,18 @@ func parallelJoin(t *testing.T, ep libnetwork.Endpoint, thrNumber int) {
func parallelLeave(t *testing.T, ep libnetwork.Endpoint, thrNumber int) {
debugf("L%d.", thrNumber)
err := ep.Leave("racing_container")
var err error
if thrNumber == first {
err = ep.Leave(fmt.Sprintf("%drace", thrNumber))
} else {
err = controller.LeaveAll(fmt.Sprintf("%drace", thrNumber))
}
runtime.LockOSThread()
if err != nil {
if _, ok := err.(libnetwork.ErrNoContainer); !ok {
if _, ok := err.(libnetwork.ErrInvalidJoin); !ok {
t.Fatal(err)
t.Fatalf("thread %d: %v", thrNumber, err)
}
}
debugf("LE%d(%v).", thrNumber, err)
@ -2012,15 +2040,33 @@ func runParallelTests(t *testing.T, thrNumber int) {
}
defer netns.Set(origns)
net, err := controller.NetworkByName("network")
net1, err := controller.NetworkByName("testhost")
if err != nil {
t.Fatal(err)
}
if net == nil {
t.Fatal("Could not find network")
if net1 == nil {
t.Fatal("Could not find network1")
}
net2, err := controller.NetworkByName("network2")
if err != nil {
t.Fatal(err)
}
if net2 == nil {
t.Fatal("Could not find network2")
}
epName := fmt.Sprintf("pep%d", thrNumber)
//var err error
var ep libnetwork.Endpoint
if thrNumber == first {
ep, err = net1.EndpointByName(epName)
} else {
ep, err = net2.EndpointByName(epName)
}
ep, err := net.EndpointByName("ep1")
if err != nil {
t.Fatal(err)
}
@ -2035,6 +2081,11 @@ func runParallelTests(t *testing.T, thrNumber int) {
debugf("\n")
err = ep.Delete()
if err != nil {
t.Fatal(err)
}
if thrNumber == first {
for thrdone := range done {
select {
@ -2043,12 +2094,8 @@ func runParallelTests(t *testing.T, thrNumber int) {
}
testns.Close()
err = ep.Delete()
if err != nil {
t.Fatal(err)
}
if err := net.Delete(); err != nil {
if err := net2.Delete(); err != nil {
t.Fatal(err)
}
}

View file

@ -139,10 +139,15 @@ func (s *sandboxData) rmEndpoint(ep *endpoint) {
}
}
// We don't check if s.endpoints is empty here because
// it should never be empty during a rmEndpoint call and
// if it is we will rightfully panic here
s.Lock()
if len(s.endpoints) == 0 {
// s.endpoints should never be empty and this is unexpected error condition
// We log an error message to note this down for debugging purposes.
logrus.Errorf("No endpoints in sandbox while trying to remove endpoint %s", ep.Name())
s.Unlock()
return
}
highEpBefore := s.endpoints[0]
var (
i int
@ -245,7 +250,10 @@ func (c *controller) LeaveAll(id string) error {
}
sData.sandbox().Destroy()
c.Lock()
delete(c.sandboxes, sandbox.GenerateKey(id))
c.Unlock()
return nil
}