Merge pull request #33830 from cpuguy83/33799_fix_plugin_mounts_issue

Make plugin removes more resilient to failure
This commit is contained in:
Tibor Vass 2017-07-03 18:37:31 -07:00 committed by GitHub
commit 48adb9d954
4 changed files with 81 additions and 29 deletions

View file

@ -5,6 +5,7 @@ package main
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
@ -25,6 +26,9 @@ import (
"crypto/x509"
"github.com/cloudflare/cfssl/helpers"
"github.com/docker/docker/api"
"github.com/docker/docker/api/types"
"github.com/docker/docker/client"
"github.com/docker/docker/integration-cli/checker"
"github.com/docker/docker/integration-cli/cli"
"github.com/docker/docker/integration-cli/daemon"
@ -2980,3 +2984,45 @@ func (s *DockerDaemonSuite) TestShmSizeReload(c *check.C) {
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
c.Assert(strings.TrimSpace(out), check.Equals, fmt.Sprintf("%v", size))
}
// TestFailedPluginRemove makes sure that a failed plugin remove does not block
// the daemon from starting
func (s *DockerDaemonSuite) TestFailedPluginRemove(c *check.C) {
testRequires(c, DaemonIsLinux, IsAmd64, SameHostDaemon)
d := daemon.New(c, dockerBinary, dockerdBinary, daemon.Config{})
d.Start(c)
cli, err := client.NewClient(d.Sock(), api.DefaultVersion, nil, nil)
c.Assert(err, checker.IsNil)
ctx, cancel := context.WithTimeout(context.Background(), 300*time.Second)
defer cancel()
name := "test-plugin-rm-fail"
out, err := cli.PluginInstall(ctx, name, types.PluginInstallOptions{
Disabled: true,
AcceptAllPermissions: true,
RemoteRef: "cpuguy83/docker-logdriver-test",
})
c.Assert(err, checker.IsNil)
defer out.Close()
io.Copy(ioutil.Discard, out)
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
p, _, err := cli.PluginInspectWithRaw(ctx, name)
c.Assert(err, checker.IsNil)
// simulate a bad/partial removal by removing the plugin config.
configPath := filepath.Join(d.Root, "plugins", p.ID, "config.json")
c.Assert(os.Remove(configPath), checker.IsNil)
d.Restart(c)
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
_, err = cli.Ping(ctx)
c.Assert(err, checker.IsNil)
_, _, err = cli.PluginInspectWithRaw(ctx, name)
// plugin should be gone since the config.json is gone
c.Assert(err, checker.NotNil)
}

View file

@ -13,7 +13,6 @@ import (
"os"
"path"
"path/filepath"
"sort"
"strings"
"github.com/Sirupsen/logrus"
@ -32,6 +31,7 @@ import (
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/pools"
"github.com/docker/docker/pkg/progress"
"github.com/docker/docker/pkg/system"
"github.com/docker/docker/plugin/v2"
refstore "github.com/docker/docker/reference"
"github.com/opencontainers/go-digest"
@ -624,14 +624,20 @@ func (pm *Manager) Remove(name string, config *types.PluginRmConfig) error {
}()
id := p.GetID()
pm.config.Store.Remove(p)
pluginDir := filepath.Join(pm.config.Root, id)
if err := recursiveUnmount(pluginDir); err != nil {
logrus.WithField("dir", pluginDir).WithField("id", id).Warn(err)
if err := mount.RecursiveUnmount(pluginDir); err != nil {
return errors.Wrap(err, "error unmounting plugin data")
}
if err := os.RemoveAll(pluginDir); err != nil {
logrus.Warnf("unable to remove %q from plugin remove: %v", pluginDir, err)
if err := os.Rename(pluginDir, pluginDir+"-removing"); err != nil {
return errors.Wrap(err, "error performing atomic remove of plugin dir")
}
if err := system.EnsureRemoveAll(pluginDir); err != nil {
return errors.Wrap(err, "error removing plugin dir")
}
pm.config.Store.Remove(p)
pm.config.LogPluginEvent(id, name, "remove")
return nil
}
@ -652,27 +658,6 @@ func getMounts(root string) ([]string, error) {
return mounts, nil
}
func recursiveUnmount(root string) error {
mounts, err := getMounts(root)
if err != nil {
return err
}
// sort in reverse-lexicographic order so the root mount will always be last
sort.Sort(sort.Reverse(sort.StringSlice(mounts)))
for i, m := range mounts {
if err := mount.Unmount(m); err != nil {
if i == len(mounts)-1 {
return errors.Wrapf(err, "error performing recursive unmount on %s", root)
}
logrus.WithError(err).WithField("mountpoint", m).Warn("could not unmount")
}
}
return nil
}
// Set sets plugin args
func (pm *Manager) Set(name string, args []string) error {
p, err := pm.config.Store.GetV2Plugin(name)

View file

@ -163,6 +163,19 @@ func (pm *Manager) StateChanged(id string, e libcontainerd.StateInfo) error {
return nil
}
func handleLoadError(err error, id string) {
if err == nil {
return
}
logger := logrus.WithError(err).WithField("id", id)
if os.IsNotExist(errors.Cause(err)) {
// Likely some error while removing on an older version of docker
logger.Warn("missing plugin config, skipping: this may be caused due to a failed remove and requires manual cleanup.")
return
}
logger.Error("error loading plugin, skipping")
}
func (pm *Manager) reload() error { // todo: restore
dir, err := ioutil.ReadDir(pm.config.Root)
if err != nil {
@ -173,9 +186,17 @@ func (pm *Manager) reload() error { // todo: restore
if validFullID.MatchString(v.Name()) {
p, err := pm.loadPlugin(v.Name())
if err != nil {
return err
handleLoadError(err, v.Name())
continue
}
plugins[p.GetID()] = p
} else {
if validFullID.MatchString(strings.TrimSuffix(v.Name(), "-removing")) {
// There was likely some error while removing this plugin, let's try to remove again here
if err := system.EnsureRemoveAll(v.Name()); err != nil {
logrus.WithError(err).WithField("id", v.Name()).Warn("error while attempting to clean up previously removed plugin")
}
}
}
}

View file

@ -204,7 +204,7 @@ func (pm *Manager) upgradePlugin(p *v2.Plugin, configDigest digest.Digest, blobs
// Make sure nothing is mounted
// This could happen if the plugin was disabled with `-f` with active mounts.
// If there is anything in `orig` is still mounted, this should error out.
if err := recursiveUnmount(orig); err != nil {
if err := mount.RecursiveUnmount(orig); err != nil {
return err
}