Browse Source

Make /proc writable, but not /proc/sys and /proc/sysrq-trigger

Some applications want to write to /proc. For instance:

docker run -it centos groupadd foo

Gives: groupadd: failure while writing changes to /etc/group

And strace reveals why:

open("/proc/self/task/13/attr/fscreate", O_RDWR) = -1 EROFS (Read-only file system)

I've looked at what other systems do, and systemd-nspawn makes /proc read-write
and /proc/sys readonly, while lxc allows "proc:mixed" which does the same,
plus it makes /proc/sysrq-trigger also readonly.

The later seems like a prudent idea, so we follows lxc proc:mixed.
Additionally we make /proc/irq and /proc/bus, as these seem to let
you control various hardware things.

Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
Alexander Larsson 11 năm trước cách đây
mục cha
commit
68493e2f7f

+ 1 - 1
pkg/libcontainer/nsinit/init.go

@@ -81,7 +81,7 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string,
 		return fmt.Errorf("set process label %s", err)
 	}
 	if container.Context["restrictions"] != "" {
-		if err := restrict.Restrict("proc", "sys"); err != nil {
+		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil {
 			return err
 		}
 	}

+ 20 - 1
pkg/libcontainer/security/restrict/restrict.go

@@ -10,12 +10,31 @@ import (
 	"github.com/dotcloud/docker/pkg/system"
 )
 
+const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
+
+func mountReadonly(path string) error {
+	if err := system.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
+		if err == syscall.EINVAL {
+			// Probably not a mountpoint, use bind-mount
+			if err := system.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
+				return err
+			}
+			if err := system.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, ""); err != nil {
+				return err
+			}
+		} else {
+			return err
+		}
+	}
+	return nil
+}
+
 // This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
 // However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
 func Restrict(mounts ...string) error {
 	// remount proc and sys as readonly
 	for _, dest := range mounts {
-		if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
+		if err := mountReadonly(dest); err != nil {
 			return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
 		}
 	}