From 80a35e0bd428ece2901e63027faa88af52cfe78a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 3 May 2019 09:59:54 -0700 Subject: [PATCH 1/4] layer: protect mountedLayer.references Add a mutex to protect concurrent access to mountedLayer.references map. Signed-off-by: Kir Kolyshkin (cherry picked from commit f73b5cb4e8b9a23ad6700577840582d66017a733) Signed-off-by: Sebastiaan van Stijn --- layer/mounted_layer.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/layer/mounted_layer.go b/layer/mounted_layer.go index d6858c662c..c5d9e0e488 100644 --- a/layer/mounted_layer.go +++ b/layer/mounted_layer.go @@ -2,6 +2,7 @@ package layer // import "github.com/docker/docker/layer" import ( "io" + "sync" "github.com/docker/docker/pkg/archive" "github.com/docker/docker/pkg/containerfs" @@ -15,6 +16,7 @@ type mountedLayer struct { path string layerStore *layerStore + sync.Mutex references map[RWLayer]*referencedRWLayer } @@ -62,16 +64,24 @@ func (ml *mountedLayer) getReference() RWLayer { ref := &referencedRWLayer{ mountedLayer: ml, } + ml.Lock() ml.references[ref] = ref + ml.Unlock() return ref } func (ml *mountedLayer) hasReferences() bool { - return len(ml.references) > 0 + ml.Lock() + ret := len(ml.references) > 0 + ml.Unlock() + + return ret } func (ml *mountedLayer) deleteReference(ref RWLayer) error { + ml.Lock() + defer ml.Unlock() if _, ok := ml.references[ref]; !ok { return ErrLayerNotRetained } @@ -81,7 +91,9 @@ func (ml *mountedLayer) deleteReference(ref RWLayer) error { func (ml *mountedLayer) retakeReference(r RWLayer) { if ref, ok := r.(*referencedRWLayer); ok { + ml.Lock() ml.references[ref] = ref + ml.Unlock() } } From eaa3e69d141a1d040baa90520d2e7710e7cc6693 Mon Sep 17 00:00:00 2001 From: Xinfeng Liu Date: Tue, 23 Apr 2019 02:33:20 +0000 Subject: [PATCH 2/4] layer: optimize layerStore mountL Goroutine stack analisys shown some lock contention while doing massively (100 instances of `docker rm`) parallel image removal, with many goroutines waiting for the mountL mutex. Optimize it. With this commit, the above operation is about 3x faster, with no noticeable change to container creation times (tested on aufs and overlay2). kolyshkin@: - squashed commits - added description - protected CreateRWLayer against name collisions by temporary assiging nil to ls.mounts[name], and treating nil as "non-existent" in all the other functions. Signed-off-by: Kir Kolyshkin (cherry picked from commit 05250a4f0094e6802dd7d338d632ea632d0c7e34) Signed-off-by: Sebastiaan van Stijn --- layer/layer_store.go | 50 ++++++++++++++++++++++++++++---------------- layer/migration.go | 6 +++--- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/layer/layer_store.go b/layer/layer_store.go index 1601465c04..737283cc2d 100644 --- a/layer/layer_store.go +++ b/layer/layer_store.go @@ -189,7 +189,9 @@ func (ls *layerStore) loadLayer(layer ChainID) (*roLayer, error) { } func (ls *layerStore) loadMount(mount string) error { - if _, ok := ls.mounts[mount]; ok { + ls.mountL.Lock() + defer ls.mountL.Unlock() + if m := ls.mounts[mount]; m != nil { return nil } @@ -477,7 +479,7 @@ func (ls *layerStore) Release(l Layer) ([]Metadata, error) { return ls.releaseLayer(layer) } -func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWLayerOpts) (RWLayer, error) { +func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWLayerOpts) (_ RWLayer, err error) { var ( storageOpt map[string]string initFunc MountInit @@ -491,13 +493,21 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL } ls.mountL.Lock() - defer ls.mountL.Unlock() - m, ok := ls.mounts[name] - if ok { + if _, ok := ls.mounts[name]; ok { + ls.mountL.Unlock() return nil, ErrMountNameConflict } + // Avoid name collision by temporary assigning nil + ls.mounts[name] = nil + ls.mountL.Unlock() + defer func() { + if err != nil { + ls.mountL.Lock() + delete(ls.mounts, name) + ls.mountL.Unlock() + } + }() - var err error var pid string var p *roLayer if string(parent) != "" { @@ -517,7 +527,7 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL }() } - m = &mountedLayer{ + m := &mountedLayer{ name: name, parent: p, mountID: ls.mountID(name), @@ -528,7 +538,7 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL if initFunc != nil { pid, err = ls.initMount(m.mountID, pid, mountLabel, initFunc, storageOpt) if err != nil { - return nil, err + return } m.initID = pid } @@ -538,10 +548,10 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL } if err = ls.driver.CreateReadWrite(m.mountID, pid, createOpts); err != nil { - return nil, err + return } if err = ls.saveMount(m); err != nil { - return nil, err + return } return m.getReference(), nil @@ -549,9 +559,9 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { ls.mountL.Lock() - defer ls.mountL.Unlock() - mount, ok := ls.mounts[id] - if !ok { + mount := ls.mounts[id] + ls.mountL.Unlock() + if mount == nil { return nil, ErrMountDoesNotExist } @@ -561,8 +571,8 @@ func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { func (ls *layerStore) GetMountID(id string) (string, error) { ls.mountL.Lock() defer ls.mountL.Unlock() - mount, ok := ls.mounts[id] - if !ok { + mount := ls.mounts[id] + if mount == nil { return "", ErrMountDoesNotExist } logrus.Debugf("GetMountID id: %s -> mountID: %s", id, mount.mountID) @@ -572,9 +582,9 @@ func (ls *layerStore) GetMountID(id string) (string, error) { func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { ls.mountL.Lock() - defer ls.mountL.Unlock() - m, ok := ls.mounts[l.Name()] - if !ok { + m := ls.mounts[l.Name()] + ls.mountL.Unlock() + if m == nil { return []Metadata{}, nil } @@ -606,7 +616,9 @@ func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { return nil, err } + ls.mountL.Lock() delete(ls.mounts, m.Name()) + ls.mountL.Unlock() ls.layerL.Lock() defer ls.layerL.Unlock() @@ -634,7 +646,9 @@ func (ls *layerStore) saveMount(mount *mountedLayer) error { } } + ls.mountL.Lock() ls.mounts[mount.name] = mount + ls.mountL.Unlock() return nil } diff --git a/layer/migration.go b/layer/migration.go index 2668ea96bb..775e552fee 100644 --- a/layer/migration.go +++ b/layer/migration.go @@ -18,9 +18,9 @@ import ( // after migration the layer may be retrieved by the given name. func (ls *layerStore) CreateRWLayerByGraphID(name, graphID string, parent ChainID) (err error) { ls.mountL.Lock() - defer ls.mountL.Unlock() - m, ok := ls.mounts[name] - if ok { + m := ls.mounts[name] + ls.mountL.Unlock() + if m != nil { if m.parent.chainID != parent { return errors.New("name conflict, mismatched parent") } From 9eeb2b5ef0bfe825eeab43d516f556bcc7d760c1 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 16 May 2019 13:51:15 -0700 Subject: [PATCH 3/4] layer/CreateRWLayerByGraphID: remove This is an additon to commit 1fea38856a ("Remove v1.10 migrator") aka PR #38265. Since that one, CreateRWLayerByGraphID() is not used anywhere, so let's drop it. Signed-off-by: Kir Kolyshkin (cherry picked from commit b4e9b507655e7dbdfb44d4ee284dcd658b859b3f) Signed-off-by: Sebastiaan van Stijn --- layer/migration.go | 59 --------------- layer/migration_test.go | 160 ---------------------------------------- 2 files changed, 219 deletions(-) diff --git a/layer/migration.go b/layer/migration.go index 775e552fee..12500694f0 100644 --- a/layer/migration.go +++ b/layer/migration.go @@ -3,7 +3,6 @@ package layer // import "github.com/docker/docker/layer" import ( "compress/gzip" "errors" - "fmt" "io" "os" @@ -13,64 +12,6 @@ import ( "github.com/vbatts/tar-split/tar/storage" ) -// CreateRWLayerByGraphID creates a RWLayer in the layer store using -// the provided name with the given graphID. To get the RWLayer -// after migration the layer may be retrieved by the given name. -func (ls *layerStore) CreateRWLayerByGraphID(name, graphID string, parent ChainID) (err error) { - ls.mountL.Lock() - m := ls.mounts[name] - ls.mountL.Unlock() - if m != nil { - if m.parent.chainID != parent { - return errors.New("name conflict, mismatched parent") - } - if m.mountID != graphID { - return errors.New("mount already exists") - } - - return nil - } - - if !ls.driver.Exists(graphID) { - return fmt.Errorf("graph ID does not exist: %q", graphID) - } - - var p *roLayer - if string(parent) != "" { - p = ls.get(parent) - if p == nil { - return ErrLayerDoesNotExist - } - - // Release parent chain if error - defer func() { - if err != nil { - ls.layerL.Lock() - ls.releaseLayer(p) - ls.layerL.Unlock() - } - }() - } - - // TODO: Ensure graphID has correct parent - - m = &mountedLayer{ - name: name, - parent: p, - mountID: graphID, - layerStore: ls, - references: map[RWLayer]*referencedRWLayer{}, - } - - // Check for existing init layer - initID := fmt.Sprintf("%s-init", graphID) - if ls.driver.Exists(initID) { - m.initID = initID - } - - return ls.saveMount(m) -} - func (ls *layerStore) ChecksumForGraphID(id, parent, oldTarDataPath, newTarDataPath string) (diffID DiffID, size int64, err error) { defer func() { if err != nil { diff --git a/layer/migration_test.go b/layer/migration_test.go index 923166371c..2b5c3301f8 100644 --- a/layer/migration_test.go +++ b/layer/migration_test.go @@ -3,7 +3,6 @@ package layer // import "github.com/docker/docker/layer" import ( "bytes" "compress/gzip" - "fmt" "io" "io/ioutil" "os" @@ -12,7 +11,6 @@ import ( "testing" "github.com/docker/docker/daemon/graphdriver" - "github.com/docker/docker/pkg/archive" "github.com/docker/docker/pkg/stringid" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" @@ -269,161 +267,3 @@ func TestLayerMigrationNoTarsplit(t *testing.T) { assertMetadata(t, metadata, createMetadata(layer2a)) } - -func TestMountMigration(t *testing.T) { - // TODO Windows: Figure out why this is failing (obvious - paths... needs porting) - if runtime.GOOS == "windows" { - t.Skip("Failing on Windows") - } - ls, _, cleanup := newTestStore(t) - defer cleanup() - - baseFiles := []FileApplier{ - newTestFile("/root/.bashrc", []byte("# Boring configuration"), 0644), - newTestFile("/etc/profile", []byte("# Base configuration"), 0644), - } - initFiles := []FileApplier{ - newTestFile("/etc/hosts", []byte{}, 0644), - newTestFile("/etc/resolv.conf", []byte{}, 0644), - } - mountFiles := []FileApplier{ - newTestFile("/etc/hosts", []byte("localhost 127.0.0.1"), 0644), - newTestFile("/root/.bashrc", []byte("# Updated configuration"), 0644), - newTestFile("/root/testfile1.txt", []byte("nothing valuable"), 0644), - } - - initTar, err := tarFromFiles(initFiles...) - if err != nil { - t.Fatal(err) - } - - mountTar, err := tarFromFiles(mountFiles...) - if err != nil { - t.Fatal(err) - } - - graph := ls.(*layerStore).driver - - layer1, err := createLayer(ls, "", initWithFiles(baseFiles...)) - if err != nil { - t.Fatal(err) - } - - graphID1 := layer1.(*referencedCacheLayer).cacheID - - containerID := stringid.GenerateRandomID() - containerInit := fmt.Sprintf("%s-init", containerID) - - if err := graph.Create(containerInit, graphID1, nil); err != nil { - t.Fatal(err) - } - if _, err := graph.ApplyDiff(containerInit, graphID1, bytes.NewReader(initTar)); err != nil { - t.Fatal(err) - } - - if err := graph.Create(containerID, containerInit, nil); err != nil { - t.Fatal(err) - } - if _, err := graph.ApplyDiff(containerID, containerInit, bytes.NewReader(mountTar)); err != nil { - t.Fatal(err) - } - - if err := ls.(*layerStore).CreateRWLayerByGraphID("migration-mount", containerID, layer1.ChainID()); err != nil { - t.Fatal(err) - } - - rwLayer1, err := ls.GetRWLayer("migration-mount") - if err != nil { - t.Fatal(err) - } - - if _, err := rwLayer1.Mount(""); err != nil { - t.Fatal(err) - } - - changes, err := rwLayer1.Changes() - if err != nil { - t.Fatal(err) - } - - if expected := 5; len(changes) != expected { - t.Logf("Changes %#v", changes) - t.Fatalf("Wrong number of changes %d, expected %d", len(changes), expected) - } - - sortChanges(changes) - - assertChange(t, changes[0], archive.Change{ - Path: "/etc", - Kind: archive.ChangeModify, - }) - assertChange(t, changes[1], archive.Change{ - Path: "/etc/hosts", - Kind: archive.ChangeModify, - }) - assertChange(t, changes[2], archive.Change{ - Path: "/root", - Kind: archive.ChangeModify, - }) - assertChange(t, changes[3], archive.Change{ - Path: "/root/.bashrc", - Kind: archive.ChangeModify, - }) - assertChange(t, changes[4], archive.Change{ - Path: "/root/testfile1.txt", - Kind: archive.ChangeAdd, - }) - - if _, err := ls.CreateRWLayer("migration-mount", layer1.ChainID(), nil); err == nil { - t.Fatal("Expected error creating mount with same name") - } else if err != ErrMountNameConflict { - t.Fatal(err) - } - - rwLayer2, err := ls.GetRWLayer("migration-mount") - if err != nil { - t.Fatal(err) - } - - if getMountLayer(rwLayer1) != getMountLayer(rwLayer2) { - t.Fatal("Expected same layer from get with same name as from migrate") - } - - if _, err := rwLayer2.Mount(""); err != nil { - t.Fatal(err) - } - - if _, err := rwLayer2.Mount(""); err != nil { - t.Fatal(err) - } - - if metadata, err := ls.Release(layer1); err != nil { - t.Fatal(err) - } else if len(metadata) > 0 { - t.Fatalf("Expected no layers to be deleted, deleted %#v", metadata) - } - - if err := rwLayer1.Unmount(); err != nil { - t.Fatal(err) - } - - if _, err := ls.ReleaseRWLayer(rwLayer1); err != nil { - t.Fatal(err) - } - - if err := rwLayer2.Unmount(); err != nil { - t.Fatal(err) - } - if err := rwLayer2.Unmount(); err != nil { - t.Fatal(err) - } - metadata, err := ls.ReleaseRWLayer(rwLayer2) - if err != nil { - t.Fatal(err) - } - if len(metadata) == 0 { - t.Fatal("Expected base layer to be deleted when deleting mount") - } - - assertMetadata(t, metadata, createMetadata(layer1)) -} From 936432326a676a77d62c8d8dcd69d25f809fde0a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 20 May 2019 14:46:54 -0700 Subject: [PATCH 4/4] layer: protect from same-name races As pointed out by Tonis, there's a race between ReleaseRWLayer() and GetRWLayer(): ``` ----- goroutine 1 ----- ----- goroutine 2 ----- ReleaseRWLayer() m := ls.mounts[l.Name()] ... m.deleteReference(l) m.hasReferences() ... GetRWLayer() ... mount := ls.mounts[id] ls.driver.Remove(m.mountID) ls.store.RemoveMount(m.name) return mount.getReference() delete(ls.mounts, m.Name()) ----------------------- ----------------------- ``` When something like this happens, GetRWLayer will return an RWLayer without a storage. Oops. There might be more races like this, and it seems the best solution is to lock by layer id/name by using pkg/locker. With this in place, name collision could not happen, so remove the part of previous commit that protected against it in CreateRWLayer (temporary nil assigmment and associated rollback). So, now we have * layerStore.mountL sync.Mutex to protect layerStore.mount map[] (against concurrent access); * mountedLayer's embedded `sync.Mutex` to protect its references map[]; * layerStore.layerL (which I haven't touched); * per-id locker, to avoid name conflicts and concurrent operations on the same rw layer. The whole rig seems to look more readable now (mutexes use is straightforward, no nested locks). Reported-by: Tonis Tiigi Signed-off-by: Kir Kolyshkin Signed-off-by: Kir Kolyshkin (cherry picked from commit af433dd200f8287305b1531d5058780be36b7e2e) Signed-off-by: Sebastiaan van Stijn --- layer/layer_store.go | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/layer/layer_store.go b/layer/layer_store.go index 737283cc2d..81730e9d92 100644 --- a/layer/layer_store.go +++ b/layer/layer_store.go @@ -10,6 +10,7 @@ import ( "github.com/docker/distribution" "github.com/docker/docker/daemon/graphdriver" "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/pkg/locker" "github.com/docker/docker/pkg/plugingetter" "github.com/docker/docker/pkg/stringid" "github.com/docker/docker/pkg/system" @@ -36,7 +37,11 @@ type layerStore struct { mounts map[string]*mountedLayer mountL sync.Mutex - os string + + // protect *RWLayer() methods from operating on the same name/id + locker *locker.Locker + + os string } // StoreOptions are the options used to create a new Store instance @@ -92,6 +97,7 @@ func newStoreFromGraphDriver(root string, driver graphdriver.Driver, os string) driver: driver, layerMap: map[ChainID]*roLayer{}, mounts: map[string]*mountedLayer{}, + locker: locker.New(), useTarSplit: !caps.ReproducesExactDiffs, os: os, } @@ -191,7 +197,7 @@ func (ls *layerStore) loadLayer(layer ChainID) (*roLayer, error) { func (ls *layerStore) loadMount(mount string) error { ls.mountL.Lock() defer ls.mountL.Unlock() - if m := ls.mounts[mount]; m != nil { + if _, ok := ls.mounts[mount]; ok { return nil } @@ -492,21 +498,15 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL initFunc = opts.InitFunc } + ls.locker.Lock(name) + defer ls.locker.Unlock(name) + ls.mountL.Lock() - if _, ok := ls.mounts[name]; ok { - ls.mountL.Unlock() + _, ok := ls.mounts[name] + ls.mountL.Unlock() + if ok { return nil, ErrMountNameConflict } - // Avoid name collision by temporary assigning nil - ls.mounts[name] = nil - ls.mountL.Unlock() - defer func() { - if err != nil { - ls.mountL.Lock() - delete(ls.mounts, name) - ls.mountL.Unlock() - } - }() var pid string var p *roLayer @@ -558,6 +558,9 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL } func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { + ls.locker.Lock(id) + defer ls.locker.Unlock(id) + ls.mountL.Lock() mount := ls.mounts[id] ls.mountL.Unlock() @@ -570,8 +573,9 @@ func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { func (ls *layerStore) GetMountID(id string) (string, error) { ls.mountL.Lock() - defer ls.mountL.Unlock() mount := ls.mounts[id] + ls.mountL.Unlock() + if mount == nil { return "", ErrMountDoesNotExist } @@ -581,8 +585,12 @@ func (ls *layerStore) GetMountID(id string) (string, error) { } func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { + name := l.Name() + ls.locker.Lock(name) + defer ls.locker.Unlock(name) + ls.mountL.Lock() - m := ls.mounts[l.Name()] + m := ls.mounts[name] ls.mountL.Unlock() if m == nil { return []Metadata{}, nil @@ -617,7 +625,7 @@ func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { } ls.mountL.Lock() - delete(ls.mounts, m.Name()) + delete(ls.mounts, name) ls.mountL.Unlock() ls.layerL.Lock()