Merge pull request #44210 from corhere/chrootarchive-without-reexec
Fix 'docker cp' mount table explosion, take four
This commit is contained in:
commit
6eab4f55fa
21 changed files with 1363 additions and 1003 deletions
|
@ -18,7 +18,7 @@ func (container *Container) ResolvePath(path string) (resolvedPath, absPath stri
|
|||
if container.BaseFS == "" {
|
||||
return "", "", errors.New("ResolvePath: BaseFS of container " + container.ID + " is unexpectedly empty")
|
||||
}
|
||||
// Check if a drive letter supplied, it must be the system drive. No-op except on Windows
|
||||
// Check if a drive letter supplied, it must be the system drive.
|
||||
path, err = system.CheckSystemDriveAndRemoveDriveLetter(path)
|
||||
if err != nil {
|
||||
return "", "", err
|
|
@ -3,17 +3,9 @@ package daemon // import "github.com/docker/docker/daemon"
|
|||
import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/errdefs"
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
"github.com/docker/docker/pkg/chrootarchive"
|
||||
"github.com/docker/docker/pkg/ioutils"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// ContainerCopy performs a deprecated operation of archiving the resource at
|
||||
|
@ -24,11 +16,6 @@ func (daemon *Daemon) ContainerCopy(name string, res string) (io.ReadCloser, err
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// Make sure an online file-system operation is permitted.
|
||||
if err := daemon.isOnlineFSOperationPermitted(ctr); err != nil {
|
||||
return nil, errdefs.System(err)
|
||||
}
|
||||
|
||||
data, err := daemon.containerCopy(ctr, res)
|
||||
if err == nil {
|
||||
return data, nil
|
||||
|
@ -48,11 +35,6 @@ func (daemon *Daemon) ContainerStatPath(name string, path string) (stat *types.C
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// Make sure an online file-system operation is permitted.
|
||||
if err := daemon.isOnlineFSOperationPermitted(ctr); err != nil {
|
||||
return nil, errdefs.System(err)
|
||||
}
|
||||
|
||||
stat, err = daemon.containerStatPath(ctr, path)
|
||||
if err == nil {
|
||||
return stat, nil
|
||||
|
@ -73,11 +55,6 @@ func (daemon *Daemon) ContainerArchivePath(name string, path string) (content io
|
|||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Make sure an online file-system operation is permitted.
|
||||
if err := daemon.isOnlineFSOperationPermitted(ctr); err != nil {
|
||||
return nil, nil, errdefs.System(err)
|
||||
}
|
||||
|
||||
content, stat, err = daemon.containerArchivePath(ctr, path)
|
||||
if err == nil {
|
||||
return content, stat, nil
|
||||
|
@ -101,11 +78,6 @@ func (daemon *Daemon) ContainerExtractToDir(name, path string, copyUIDGID, noOve
|
|||
return err
|
||||
}
|
||||
|
||||
// Make sure an online file-system operation is permitted.
|
||||
if err := daemon.isOnlineFSOperationPermitted(ctr); err != nil {
|
||||
return errdefs.System(err)
|
||||
}
|
||||
|
||||
err = daemon.containerExtractToDir(ctr, path, copyUIDGID, noOverwriteDirNonDir, content)
|
||||
if err == nil {
|
||||
return nil
|
||||
|
@ -116,299 +88,3 @@ func (daemon *Daemon) ContainerExtractToDir(name, path string, copyUIDGID, noOve
|
|||
}
|
||||
return errdefs.System(err)
|
||||
}
|
||||
|
||||
// containerStatPath stats the filesystem resource at the specified path in this
|
||||
// container. Returns stat info about the resource.
|
||||
func (daemon *Daemon) containerStatPath(container *container.Container, path string) (stat *types.ContainerPathStat, err error) {
|
||||
container.Lock()
|
||||
defer container.Unlock()
|
||||
|
||||
if err = daemon.Mount(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer daemon.Unmount(container)
|
||||
|
||||
err = daemon.mountVolumes(container)
|
||||
defer container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Normalize path before sending to rootfs
|
||||
path = filepath.FromSlash(path)
|
||||
|
||||
resolvedPath, absPath, err := container.ResolvePath(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return container.StatPath(resolvedPath, absPath)
|
||||
}
|
||||
|
||||
// containerArchivePath creates an archive of the filesystem resource at the specified
|
||||
// path in this container. Returns a tar archive of the resource and stat info
|
||||
// about the resource.
|
||||
func (daemon *Daemon) containerArchivePath(container *container.Container, path string) (content io.ReadCloser, stat *types.ContainerPathStat, err error) {
|
||||
container.Lock()
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// Wait to unlock the container until the archive is fully read
|
||||
// (see the ReadCloseWrapper func below) or if there is an error
|
||||
// before that occurs.
|
||||
container.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
if err = daemon.Mount(container); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// unmount any volumes
|
||||
container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
// unmount the container's rootfs
|
||||
daemon.Unmount(container)
|
||||
}
|
||||
}()
|
||||
|
||||
if err = daemon.mountVolumes(container); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Normalize path before sending to rootfs
|
||||
path = filepath.FromSlash(path)
|
||||
|
||||
resolvedPath, absPath, err := container.ResolvePath(path)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
stat, err = container.StatPath(resolvedPath, absPath)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// We need to rebase the archive entries if the last element of the
|
||||
// resolved path was a symlink that was evaluated and is now different
|
||||
// than the requested path. For example, if the given path was "/foo/bar/",
|
||||
// but it resolved to "/var/lib/docker/containers/{id}/foo/baz/", we want
|
||||
// to ensure that the archive entries start with "bar" and not "baz". This
|
||||
// also catches the case when the root directory of the container is
|
||||
// requested: we want the archive entries to start with "/" and not the
|
||||
// container ID.
|
||||
|
||||
// Get the source and the base paths of the container resolved path in order
|
||||
// to get the proper tar options for the rebase tar.
|
||||
resolvedPath = filepath.Clean(resolvedPath)
|
||||
if filepath.Base(resolvedPath) == "." {
|
||||
resolvedPath += string(filepath.Separator) + "."
|
||||
}
|
||||
|
||||
sourceDir := resolvedPath
|
||||
sourceBase := "."
|
||||
|
||||
if stat.Mode&os.ModeDir == 0 { // not dir
|
||||
sourceDir, sourceBase = filepath.Split(resolvedPath)
|
||||
}
|
||||
opts := archive.TarResourceRebaseOpts(sourceBase, filepath.Base(absPath))
|
||||
|
||||
data, err := chrootarchive.Tar(sourceDir, opts, container.BaseFS)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
content = ioutils.NewReadCloserWrapper(data, func() error {
|
||||
err := data.Close()
|
||||
container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
daemon.Unmount(container)
|
||||
container.Unlock()
|
||||
return err
|
||||
})
|
||||
|
||||
daemon.LogContainerEvent(container, "archive-path")
|
||||
|
||||
return content, stat, nil
|
||||
}
|
||||
|
||||
// containerExtractToDir extracts the given tar archive to the specified location in the
|
||||
// filesystem of this container. The given path must be of a directory in the
|
||||
// container. If it is not, the error will be an errdefs.InvalidParameter. If
|
||||
// noOverwriteDirNonDir is true then it will be an error if unpacking the
|
||||
// given content would cause an existing directory to be replaced with a non-
|
||||
// directory and vice versa.
|
||||
func (daemon *Daemon) containerExtractToDir(container *container.Container, path string, copyUIDGID, noOverwriteDirNonDir bool, content io.Reader) (err error) {
|
||||
container.Lock()
|
||||
defer container.Unlock()
|
||||
|
||||
if err = daemon.Mount(container); err != nil {
|
||||
return err
|
||||
}
|
||||
defer daemon.Unmount(container)
|
||||
|
||||
err = daemon.mountVolumes(container)
|
||||
defer container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Normalize path before sending to rootfs'
|
||||
path = filepath.FromSlash(path)
|
||||
|
||||
// Check if a drive letter supplied, it must be the system drive. No-op except on Windows
|
||||
path, err = system.CheckSystemDriveAndRemoveDriveLetter(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// The destination path needs to be resolved to a host path, with all
|
||||
// symbolic links followed in the scope of the container's rootfs. Note
|
||||
// that we do not use `container.ResolvePath(path)` here because we need
|
||||
// to also evaluate the last path element if it is a symlink. This is so
|
||||
// that you can extract an archive to a symlink that points to a directory.
|
||||
|
||||
// Consider the given path as an absolute path in the container.
|
||||
absPath := archive.PreserveTrailingDotOrSeparator(filepath.Join(string(filepath.Separator), path), path)
|
||||
|
||||
// This will evaluate the last path element if it is a symlink.
|
||||
resolvedPath, err := container.GetResourcePath(absPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stat, err := os.Lstat(resolvedPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !stat.IsDir() {
|
||||
return errdefs.InvalidParameter(errors.New("extraction point is not a directory"))
|
||||
}
|
||||
|
||||
// Need to check if the path is in a volume. If it is, it cannot be in a
|
||||
// read-only volume. If it is not in a volume, the container cannot be
|
||||
// configured with a read-only rootfs.
|
||||
|
||||
// Use the resolved path relative to the container rootfs as the new
|
||||
// absPath. This way we fully follow any symlinks in a volume that may
|
||||
// lead back outside the volume.
|
||||
//
|
||||
// The Windows implementation of filepath.Rel in golang 1.4 does not
|
||||
// support volume style file path semantics. On Windows when using the
|
||||
// filter driver, we are guaranteed that the path will always be
|
||||
// a volume file path.
|
||||
var baseRel string
|
||||
if strings.HasPrefix(resolvedPath, `\\?\Volume{`) {
|
||||
if strings.HasPrefix(resolvedPath, container.BaseFS) {
|
||||
baseRel = resolvedPath[len(container.BaseFS):]
|
||||
if baseRel[:1] == `\` {
|
||||
baseRel = baseRel[1:]
|
||||
}
|
||||
}
|
||||
} else {
|
||||
baseRel, err = filepath.Rel(container.BaseFS, resolvedPath)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Make it an absolute path.
|
||||
absPath = filepath.Join(string(filepath.Separator), baseRel)
|
||||
|
||||
toVolume, err := checkIfPathIsInAVolume(container, absPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !toVolume && container.HostConfig.ReadonlyRootfs {
|
||||
return errdefs.InvalidParameter(errors.New("container rootfs is marked read-only"))
|
||||
}
|
||||
|
||||
options := daemon.defaultTarCopyOptions(noOverwriteDirNonDir)
|
||||
|
||||
if copyUIDGID {
|
||||
var err error
|
||||
// tarCopyOptions will appropriately pull in the right uid/gid for the
|
||||
// user/group and will set the options.
|
||||
options, err = daemon.tarCopyOptions(container, noOverwriteDirNonDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := chrootarchive.UntarWithRoot(content, resolvedPath, options, container.BaseFS); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
daemon.LogContainerEvent(container, "extract-to-dir")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (daemon *Daemon) containerCopy(container *container.Container, resource string) (rc io.ReadCloser, err error) {
|
||||
if resource[0] == '/' || resource[0] == '\\' {
|
||||
resource = resource[1:]
|
||||
}
|
||||
container.Lock()
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// Wait to unlock the container until the archive is fully read
|
||||
// (see the ReadCloseWrapper func below) or if there is an error
|
||||
// before that occurs.
|
||||
container.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
if err := daemon.Mount(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// unmount any volumes
|
||||
container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
// unmount the container's rootfs
|
||||
daemon.Unmount(container)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := daemon.mountVolumes(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Normalize path before sending to rootfs
|
||||
resource = filepath.FromSlash(resource)
|
||||
|
||||
basePath, err := container.GetResourcePath(resource)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stat, err := os.Stat(basePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var filter []string
|
||||
if !stat.IsDir() {
|
||||
d, f := filepath.Split(basePath)
|
||||
basePath = d
|
||||
filter = []string{f}
|
||||
}
|
||||
archv, err := chrootarchive.Tar(basePath, &archive.TarOptions{
|
||||
Compression: archive.Uncompressed,
|
||||
IncludeFiles: filter,
|
||||
}, container.BaseFS)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reader := ioutils.NewReadCloserWrapper(archv, func() error {
|
||||
err := archv.Close()
|
||||
container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
daemon.Unmount(container)
|
||||
container.Unlock()
|
||||
return err
|
||||
})
|
||||
daemon.LogContainerEvent(container, "copy")
|
||||
return reader, nil
|
||||
}
|
||||
|
|
|
@ -4,12 +4,212 @@
|
|||
package daemon // import "github.com/docker/docker/daemon"
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/errdefs"
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
"github.com/docker/docker/pkg/ioutils"
|
||||
volumemounts "github.com/docker/docker/volume/mounts"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// containerStatPath stats the filesystem resource at the specified path in this
|
||||
// container. Returns stat info about the resource.
|
||||
func (daemon *Daemon) containerStatPath(container *container.Container, path string) (stat *types.ContainerPathStat, err error) {
|
||||
container.Lock()
|
||||
defer container.Unlock()
|
||||
|
||||
cfs, err := daemon.openContainerFS(container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer cfs.Close()
|
||||
|
||||
return cfs.Stat(context.TODO(), path)
|
||||
}
|
||||
|
||||
// containerArchivePath creates an archive of the filesystem resource at the specified
|
||||
// path in this container. Returns a tar archive of the resource and stat info
|
||||
// about the resource.
|
||||
func (daemon *Daemon) containerArchivePath(container *container.Container, path string) (content io.ReadCloser, stat *types.ContainerPathStat, err error) {
|
||||
container.Lock()
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// Wait to unlock the container until the archive is fully read
|
||||
// (see the ReadCloseWrapper func below) or if there is an error
|
||||
// before that occurs.
|
||||
container.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
cfs, err := daemon.openContainerFS(container)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
cfs.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
absPath := archive.PreserveTrailingDotOrSeparator(filepath.Join("/", path), path)
|
||||
|
||||
stat, err = cfs.Stat(context.TODO(), absPath)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
sourceDir, sourceBase := absPath, "."
|
||||
if stat.Mode&os.ModeDir == 0 { // not dir
|
||||
sourceDir, sourceBase = filepath.Split(absPath)
|
||||
}
|
||||
opts := archive.TarResourceRebaseOpts(sourceBase, filepath.Base(absPath))
|
||||
|
||||
tb, err := archive.NewTarballer(sourceDir, opts)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
cfs.GoInFS(context.TODO(), tb.Do)
|
||||
data := tb.Reader()
|
||||
content = ioutils.NewReadCloserWrapper(data, func() error {
|
||||
err := data.Close()
|
||||
_ = cfs.Close()
|
||||
container.Unlock()
|
||||
return err
|
||||
})
|
||||
|
||||
daemon.LogContainerEvent(container, "archive-path")
|
||||
|
||||
return content, stat, nil
|
||||
}
|
||||
|
||||
// containerExtractToDir extracts the given tar archive to the specified location in the
|
||||
// filesystem of this container. The given path must be of a directory in the
|
||||
// container. If it is not, the error will be an errdefs.InvalidParameter. If
|
||||
// noOverwriteDirNonDir is true then it will be an error if unpacking the
|
||||
// given content would cause an existing directory to be replaced with a non-
|
||||
// directory and vice versa.
|
||||
func (daemon *Daemon) containerExtractToDir(container *container.Container, path string, copyUIDGID, noOverwriteDirNonDir bool, content io.Reader) (err error) {
|
||||
container.Lock()
|
||||
defer container.Unlock()
|
||||
|
||||
cfs, err := daemon.openContainerFS(container)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer cfs.Close()
|
||||
|
||||
err = cfs.RunInFS(context.TODO(), func() error {
|
||||
// The destination path needs to be resolved with all symbolic links
|
||||
// followed. Note that we need to also evaluate the last path element if
|
||||
// it is a symlink. This is so that you can extract an archive to a
|
||||
// symlink that points to a directory.
|
||||
absPath, err := filepath.EvalSymlinks(filepath.Join("/", path))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
absPath = archive.PreserveTrailingDotOrSeparator(absPath, path)
|
||||
|
||||
stat, err := os.Lstat(absPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !stat.IsDir() {
|
||||
return errdefs.InvalidParameter(errors.New("extraction point is not a directory"))
|
||||
}
|
||||
|
||||
// Need to check if the path is in a volume. If it is, it cannot be in a
|
||||
// read-only volume. If it is not in a volume, the container cannot be
|
||||
// configured with a read-only rootfs.
|
||||
toVolume, err := checkIfPathIsInAVolume(container, absPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !toVolume && container.HostConfig.ReadonlyRootfs {
|
||||
return errdefs.InvalidParameter(errors.New("container rootfs is marked read-only"))
|
||||
}
|
||||
|
||||
options := daemon.defaultTarCopyOptions(noOverwriteDirNonDir)
|
||||
|
||||
if copyUIDGID {
|
||||
var err error
|
||||
// tarCopyOptions will appropriately pull in the right uid/gid for the
|
||||
// user/group and will set the options.
|
||||
options, err = daemon.tarCopyOptions(container, noOverwriteDirNonDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return archive.Untar(content, absPath, options)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
daemon.LogContainerEvent(container, "extract-to-dir")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (daemon *Daemon) containerCopy(container *container.Container, resource string) (rc io.ReadCloser, err error) {
|
||||
container.Lock()
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// Wait to unlock the container until the archive is fully read
|
||||
// (see the ReadCloseWrapper func below) or if there is an error
|
||||
// before that occurs.
|
||||
container.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
cfs, err := daemon.openContainerFS(container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
cfs.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
err = cfs.RunInFS(context.TODO(), func() error {
|
||||
_, err := os.Stat(resource)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tb, err := archive.NewTarballer(resource, &archive.TarOptions{
|
||||
Compression: archive.Uncompressed,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cfs.GoInFS(context.TODO(), tb.Do)
|
||||
archv := tb.Reader()
|
||||
reader := ioutils.NewReadCloserWrapper(archv, func() error {
|
||||
err := archv.Close()
|
||||
_ = cfs.Close()
|
||||
container.Unlock()
|
||||
return err
|
||||
})
|
||||
daemon.LogContainerEvent(container, "copy")
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// checkIfPathIsInAVolume checks if the path is in a volume. If it is, it
|
||||
// cannot be in a read-only volume. If it is not in a volume, the container
|
||||
// cannot be configured with a read-only rootfs.
|
||||
|
@ -26,9 +226,3 @@ func checkIfPathIsInAVolume(container *container.Container, absPath string) (boo
|
|||
}
|
||||
return toVolume, nil
|
||||
}
|
||||
|
||||
// isOnlineFSOperationPermitted returns an error if an online filesystem operation
|
||||
// is not permitted.
|
||||
func (daemon *Daemon) isOnlineFSOperationPermitted(container *container.Container) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -2,11 +2,337 @@ package daemon // import "github.com/docker/docker/daemon"
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
containertypes "github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/errdefs"
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
"github.com/docker/docker/pkg/chrootarchive"
|
||||
"github.com/docker/docker/pkg/ioutils"
|
||||
"github.com/docker/docker/pkg/system"
|
||||
)
|
||||
|
||||
// containerStatPath stats the filesystem resource at the specified path in this
|
||||
// container. Returns stat info about the resource.
|
||||
func (daemon *Daemon) containerStatPath(container *container.Container, path string) (stat *types.ContainerPathStat, err error) {
|
||||
container.Lock()
|
||||
defer container.Unlock()
|
||||
|
||||
// Make sure an online file-system operation is permitted.
|
||||
if err := daemon.isOnlineFSOperationPermitted(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err = daemon.Mount(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer daemon.Unmount(container)
|
||||
|
||||
err = daemon.mountVolumes(container)
|
||||
defer container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Normalize path before sending to rootfs
|
||||
path = filepath.FromSlash(path)
|
||||
|
||||
resolvedPath, absPath, err := container.ResolvePath(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return container.StatPath(resolvedPath, absPath)
|
||||
}
|
||||
|
||||
// containerArchivePath creates an archive of the filesystem resource at the specified
|
||||
// path in this container. Returns a tar archive of the resource and stat info
|
||||
// about the resource.
|
||||
func (daemon *Daemon) containerArchivePath(container *container.Container, path string) (content io.ReadCloser, stat *types.ContainerPathStat, err error) {
|
||||
container.Lock()
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// Wait to unlock the container until the archive is fully read
|
||||
// (see the ReadCloseWrapper func below) or if there is an error
|
||||
// before that occurs.
|
||||
container.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
// Make sure an online file-system operation is permitted.
|
||||
if err := daemon.isOnlineFSOperationPermitted(container); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
if err = daemon.Mount(container); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// unmount any volumes
|
||||
container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
// unmount the container's rootfs
|
||||
daemon.Unmount(container)
|
||||
}
|
||||
}()
|
||||
|
||||
if err = daemon.mountVolumes(container); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Normalize path before sending to rootfs
|
||||
path = filepath.FromSlash(path)
|
||||
|
||||
resolvedPath, absPath, err := container.ResolvePath(path)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
stat, err = container.StatPath(resolvedPath, absPath)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// We need to rebase the archive entries if the last element of the
|
||||
// resolved path was a symlink that was evaluated and is now different
|
||||
// than the requested path. For example, if the given path was "/foo/bar/",
|
||||
// but it resolved to "/var/lib/docker/containers/{id}/foo/baz/", we want
|
||||
// to ensure that the archive entries start with "bar" and not "baz". This
|
||||
// also catches the case when the root directory of the container is
|
||||
// requested: we want the archive entries to start with "/" and not the
|
||||
// container ID.
|
||||
|
||||
// Get the source and the base paths of the container resolved path in order
|
||||
// to get the proper tar options for the rebase tar.
|
||||
resolvedPath = filepath.Clean(resolvedPath)
|
||||
if filepath.Base(resolvedPath) == "." {
|
||||
resolvedPath += string(filepath.Separator) + "."
|
||||
}
|
||||
|
||||
sourceDir := resolvedPath
|
||||
sourceBase := "."
|
||||
|
||||
if stat.Mode&os.ModeDir == 0 { // not dir
|
||||
sourceDir, sourceBase = filepath.Split(resolvedPath)
|
||||
}
|
||||
opts := archive.TarResourceRebaseOpts(sourceBase, filepath.Base(absPath))
|
||||
|
||||
data, err := chrootarchive.Tar(sourceDir, opts, container.BaseFS)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
content = ioutils.NewReadCloserWrapper(data, func() error {
|
||||
err := data.Close()
|
||||
container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
daemon.Unmount(container)
|
||||
container.Unlock()
|
||||
return err
|
||||
})
|
||||
|
||||
daemon.LogContainerEvent(container, "archive-path")
|
||||
|
||||
return content, stat, nil
|
||||
}
|
||||
|
||||
// containerExtractToDir extracts the given tar archive to the specified location in the
|
||||
// filesystem of this container. The given path must be of a directory in the
|
||||
// container. If it is not, the error will be an errdefs.InvalidParameter. If
|
||||
// noOverwriteDirNonDir is true then it will be an error if unpacking the
|
||||
// given content would cause an existing directory to be replaced with a non-
|
||||
// directory and vice versa.
|
||||
func (daemon *Daemon) containerExtractToDir(container *container.Container, path string, copyUIDGID, noOverwriteDirNonDir bool, content io.Reader) (err error) {
|
||||
container.Lock()
|
||||
defer container.Unlock()
|
||||
|
||||
// Make sure an online file-system operation is permitted.
|
||||
if err := daemon.isOnlineFSOperationPermitted(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = daemon.Mount(container); err != nil {
|
||||
return err
|
||||
}
|
||||
defer daemon.Unmount(container)
|
||||
|
||||
err = daemon.mountVolumes(container)
|
||||
defer container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Normalize path before sending to rootfs'
|
||||
path = filepath.FromSlash(path)
|
||||
|
||||
// Check if a drive letter supplied, it must be the system drive. No-op except on Windows
|
||||
path, err = system.CheckSystemDriveAndRemoveDriveLetter(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// The destination path needs to be resolved to a host path, with all
|
||||
// symbolic links followed in the scope of the container's rootfs. Note
|
||||
// that we do not use `container.ResolvePath(path)` here because we need
|
||||
// to also evaluate the last path element if it is a symlink. This is so
|
||||
// that you can extract an archive to a symlink that points to a directory.
|
||||
|
||||
// Consider the given path as an absolute path in the container.
|
||||
absPath := archive.PreserveTrailingDotOrSeparator(filepath.Join(string(filepath.Separator), path), path)
|
||||
|
||||
// This will evaluate the last path element if it is a symlink.
|
||||
resolvedPath, err := container.GetResourcePath(absPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stat, err := os.Lstat(resolvedPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !stat.IsDir() {
|
||||
return errdefs.InvalidParameter(errors.New("extraction point is not a directory"))
|
||||
}
|
||||
|
||||
// Need to check if the path is in a volume. If it is, it cannot be in a
|
||||
// read-only volume. If it is not in a volume, the container cannot be
|
||||
// configured with a read-only rootfs.
|
||||
|
||||
// Use the resolved path relative to the container rootfs as the new
|
||||
// absPath. This way we fully follow any symlinks in a volume that may
|
||||
// lead back outside the volume.
|
||||
//
|
||||
// The Windows implementation of filepath.Rel in golang 1.4 does not
|
||||
// support volume style file path semantics. On Windows when using the
|
||||
// filter driver, we are guaranteed that the path will always be
|
||||
// a volume file path.
|
||||
var baseRel string
|
||||
if strings.HasPrefix(resolvedPath, `\\?\Volume{`) {
|
||||
if strings.HasPrefix(resolvedPath, container.BaseFS) {
|
||||
baseRel = resolvedPath[len(container.BaseFS):]
|
||||
if baseRel[:1] == `\` {
|
||||
baseRel = baseRel[1:]
|
||||
}
|
||||
}
|
||||
} else {
|
||||
baseRel, err = filepath.Rel(container.BaseFS, resolvedPath)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Make it an absolute path.
|
||||
absPath = filepath.Join(string(filepath.Separator), baseRel)
|
||||
|
||||
toVolume, err := checkIfPathIsInAVolume(container, absPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !toVolume && container.HostConfig.ReadonlyRootfs {
|
||||
return errdefs.InvalidParameter(errors.New("container rootfs is marked read-only"))
|
||||
}
|
||||
|
||||
options := daemon.defaultTarCopyOptions(noOverwriteDirNonDir)
|
||||
|
||||
if copyUIDGID {
|
||||
var err error
|
||||
// tarCopyOptions will appropriately pull in the right uid/gid for the
|
||||
// user/group and will set the options.
|
||||
options, err = daemon.tarCopyOptions(container, noOverwriteDirNonDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := chrootarchive.UntarWithRoot(content, resolvedPath, options, container.BaseFS); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
daemon.LogContainerEvent(container, "extract-to-dir")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (daemon *Daemon) containerCopy(container *container.Container, resource string) (rc io.ReadCloser, err error) {
|
||||
if resource[0] == '/' || resource[0] == '\\' {
|
||||
resource = resource[1:]
|
||||
}
|
||||
container.Lock()
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// Wait to unlock the container until the archive is fully read
|
||||
// (see the ReadCloseWrapper func below) or if there is an error
|
||||
// before that occurs.
|
||||
container.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
// Make sure an online file-system operation is permitted.
|
||||
if err := daemon.isOnlineFSOperationPermitted(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := daemon.Mount(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// unmount any volumes
|
||||
container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
// unmount the container's rootfs
|
||||
daemon.Unmount(container)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := daemon.mountVolumes(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Normalize path before sending to rootfs
|
||||
resource = filepath.FromSlash(resource)
|
||||
|
||||
basePath, err := container.GetResourcePath(resource)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stat, err := os.Stat(basePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var filter []string
|
||||
if !stat.IsDir() {
|
||||
d, f := filepath.Split(basePath)
|
||||
basePath = d
|
||||
filter = []string{f}
|
||||
}
|
||||
archv, err := chrootarchive.Tar(basePath, &archive.TarOptions{
|
||||
Compression: archive.Uncompressed,
|
||||
IncludeFiles: filter,
|
||||
}, container.BaseFS)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reader := ioutils.NewReadCloserWrapper(archv, func() error {
|
||||
err := archv.Close()
|
||||
container.DetachAndUnmount(daemon.LogVolumeEvent)
|
||||
daemon.Unmount(container)
|
||||
container.Unlock()
|
||||
return err
|
||||
})
|
||||
daemon.LogContainerEvent(container, "copy")
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// checkIfPathIsInAVolume checks if the path is in a volume. If it is, it
|
||||
// cannot be in a read-only volume. If it is not in a volume, the container
|
||||
// cannot be configured with a read-only rootfs.
|
||||
|
@ -21,9 +347,9 @@ func checkIfPathIsInAVolume(container *container.Container, absPath string) (boo
|
|||
// is not permitted (such as stat or for copying). Running Hyper-V containers
|
||||
// cannot have their file-system interrogated from the host as the filter is
|
||||
// loaded inside the utility VM, not the host.
|
||||
// IMPORTANT: The container lock must NOT be held when calling this function.
|
||||
// IMPORTANT: The container lock MUST be held when calling this function.
|
||||
func (daemon *Daemon) isOnlineFSOperationPermitted(container *container.Container) error {
|
||||
if !container.IsRunning() {
|
||||
if !container.Running {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
221
daemon/containerfs_linux.go
Normal file
221
daemon/containerfs_linux.go
Normal file
|
@ -0,0 +1,221 @@
|
|||
package daemon // import "github.com/docker/docker/daemon"
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/moby/sys/mount"
|
||||
"github.com/moby/sys/symlink"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/internal/mounttree"
|
||||
"github.com/docker/docker/internal/unshare"
|
||||
"github.com/docker/docker/pkg/fileutils"
|
||||
)
|
||||
|
||||
type future struct {
|
||||
fn func() error
|
||||
res chan<- error
|
||||
}
|
||||
|
||||
// containerFSView allows functions to be run in the context of a container's
|
||||
// filesystem. Inside these functions, the root directory is the container root
|
||||
// for all native OS filesystem APIs, including, but not limited to, the [os]
|
||||
// and [golang.org/x/sys/unix] packages. The view of the container's filesystem
|
||||
// is live and read-write. Each view has its own private set of tmpfs mounts.
|
||||
// Any files written under a tmpfs mount are not visible to processes inside the
|
||||
// container nor any other view of the container's filesystem, and vice versa.
|
||||
//
|
||||
// Each view has its own current working directory which is initialized to the
|
||||
// root of the container filesystem and can be changed with [os.Chdir]. Changes
|
||||
// to the current directory persist across successive [*containerFSView.RunInFS]
|
||||
// and [*containerFSView.GoInFS] calls.
|
||||
//
|
||||
// Multiple views of the same container filesystem can coexist at the same time.
|
||||
// Only one function can be running in a particular filesystem view at any given
|
||||
// time. Calls to [*containerFSView.RunInFS] or [*containerFSView.GoInFS] will
|
||||
// block while another function is running. If more than one call is blocked
|
||||
// concurrently, the order they are unblocked is undefined.
|
||||
type containerFSView struct {
|
||||
d *Daemon
|
||||
ctr *container.Container
|
||||
todo chan future
|
||||
done chan error
|
||||
}
|
||||
|
||||
// openContainerFS opens a new view of the container's filesystem.
|
||||
func (daemon *Daemon) openContainerFS(container *container.Container) (_ *containerFSView, err error) {
|
||||
if err := daemon.Mount(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
_ = daemon.Unmount(container)
|
||||
}
|
||||
}()
|
||||
|
||||
mounts, err := daemon.setupMounts(container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
_ = container.UnmountVolumes(daemon.LogVolumeEvent)
|
||||
}
|
||||
}()
|
||||
|
||||
// Setup in initial mount namespace complete. We're ready to unshare the
|
||||
// mount namespace and bind the volume mounts into that private view of
|
||||
// the container FS.
|
||||
todo := make(chan future)
|
||||
done := make(chan error)
|
||||
err = unshare.Go(unix.CLONE_NEWNS,
|
||||
func() error {
|
||||
if err := mount.MakeRSlave("/"); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, m := range mounts {
|
||||
dest, err := container.GetResourcePath(m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var stat os.FileInfo
|
||||
stat, err = os.Stat(m.Source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
bindMode := "rbind"
|
||||
if m.NonRecursive {
|
||||
bindMode = "bind"
|
||||
}
|
||||
writeMode := "ro"
|
||||
if m.Writable {
|
||||
writeMode = "rw"
|
||||
}
|
||||
|
||||
// openContainerFS() is called for temporary mounts
|
||||
// outside the container. Soon these will be unmounted
|
||||
// with lazy unmount option and given we have mounted
|
||||
// them rbind, all the submounts will propagate if these
|
||||
// are shared. If daemon is running in host namespace
|
||||
// and has / as shared then these unmounts will
|
||||
// propagate and unmount original mount as well. So make
|
||||
// all these mounts rprivate. Do not use propagation
|
||||
// property of volume as that should apply only when
|
||||
// mounting happens inside the container.
|
||||
opts := strings.Join([]string{bindMode, writeMode, "rprivate"}, ",")
|
||||
if err := mount.Mount(m.Source, dest, "", opts); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return mounttree.SwitchRoot(container.BaseFS)
|
||||
},
|
||||
func() {
|
||||
defer close(done)
|
||||
|
||||
for it := range todo {
|
||||
err := it.fn()
|
||||
if it.res != nil {
|
||||
it.res <- err
|
||||
}
|
||||
}
|
||||
|
||||
// The thread will terminate when this goroutine returns, taking the
|
||||
// mount namespace and all the volume bind-mounts with it.
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vw := &containerFSView{
|
||||
d: daemon,
|
||||
ctr: container,
|
||||
todo: todo,
|
||||
done: done,
|
||||
}
|
||||
runtime.SetFinalizer(vw, (*containerFSView).Close)
|
||||
return vw, nil
|
||||
}
|
||||
|
||||
// RunInFS synchronously runs fn in the context of the container filesytem and
|
||||
// passes through its return value.
|
||||
//
|
||||
// The container filesystem is only visible to functions called in the same
|
||||
// goroutine as fn. Goroutines started from fn will see the host's filesystem.
|
||||
func (vw *containerFSView) RunInFS(ctx context.Context, fn func() error) error {
|
||||
res := make(chan error)
|
||||
select {
|
||||
case vw.todo <- future{fn: fn, res: res}:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
return <-res
|
||||
}
|
||||
|
||||
// GoInFS starts fn in the container FS. It blocks until fn is started but does
|
||||
// not wait until fn returns. An error is returned if ctx is canceled before fn
|
||||
// has been started.
|
||||
//
|
||||
// The container filesystem is only visible to functions called in the same
|
||||
// goroutine as fn. Goroutines started from fn will see the host's filesystem.
|
||||
func (vw *containerFSView) GoInFS(ctx context.Context, fn func()) error {
|
||||
select {
|
||||
case vw.todo <- future{fn: func() error { fn(); return nil }}:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
// Close waits until any in-flight operations complete and frees all
|
||||
// resources associated with vw.
|
||||
func (vw *containerFSView) Close() error {
|
||||
runtime.SetFinalizer(vw, nil)
|
||||
close(vw.todo)
|
||||
err := multierror.Append(nil, <-vw.done)
|
||||
err = multierror.Append(err, vw.ctr.UnmountVolumes(vw.d.LogVolumeEvent))
|
||||
err = multierror.Append(err, vw.d.Unmount(vw.ctr))
|
||||
return err.ErrorOrNil()
|
||||
}
|
||||
|
||||
// Stat returns the metadata for path, relative to the current working directory
|
||||
// of vw inside the container filesystem view.
|
||||
func (vw *containerFSView) Stat(ctx context.Context, path string) (*types.ContainerPathStat, error) {
|
||||
var stat *types.ContainerPathStat
|
||||
err := vw.RunInFS(ctx, func() error {
|
||||
lstat, err := os.Lstat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var target string
|
||||
if lstat.Mode()&os.ModeSymlink != 0 {
|
||||
// Fully evaluate symlinks along path to the ultimate
|
||||
// target, or as much as possible with broken links.
|
||||
target, err = symlink.FollowSymlinkInScope(path, "/")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
stat = &types.ContainerPathStat{
|
||||
Name: filepath.Base(path),
|
||||
Size: lstat.Size(),
|
||||
Mode: lstat.Mode(),
|
||||
Mtime: lstat.ModTime(),
|
||||
LinkTarget: target,
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return stat, err
|
||||
}
|
|
@ -12,9 +12,7 @@ import (
|
|||
|
||||
mounttypes "github.com/docker/docker/api/types/mount"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/pkg/fileutils"
|
||||
volumemounts "github.com/docker/docker/volume/mounts"
|
||||
"github.com/moby/sys/mount"
|
||||
)
|
||||
|
||||
// setupMounts iterates through each of the mount points for a container and
|
||||
|
@ -112,51 +110,3 @@ func setBindModeIfNull(bind *volumemounts.MountPoint) {
|
|||
bind.Mode = "z"
|
||||
}
|
||||
}
|
||||
|
||||
func (daemon *Daemon) mountVolumes(container *container.Container) error {
|
||||
mounts, err := daemon.setupMounts(container)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range mounts {
|
||||
dest, err := container.GetResourcePath(m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var stat os.FileInfo
|
||||
stat, err = os.Stat(m.Source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
bindMode := "rbind"
|
||||
if m.NonRecursive {
|
||||
bindMode = "bind"
|
||||
}
|
||||
writeMode := "ro"
|
||||
if m.Writable {
|
||||
writeMode = "rw"
|
||||
}
|
||||
|
||||
// mountVolumes() seems to be called for temporary mounts
|
||||
// outside the container. Soon these will be unmounted with
|
||||
// lazy unmount option and given we have mounted the rbind,
|
||||
// all the submounts will propagate if these are shared. If
|
||||
// daemon is running in host namespace and has / as shared
|
||||
// then these unmounts will propagate and unmount original
|
||||
// mount as well. So make all these mounts rprivate.
|
||||
// Do not use propagation property of volume as that should
|
||||
// apply only when mounting happens inside the container.
|
||||
opts := strings.Join([]string{bindMode, writeMode, "rprivate"}, ",")
|
||||
if err := mount.Mount(m.Source, dest, "", opts); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -37,6 +37,10 @@ if [ -f /sys/fs/cgroup/cgroup.controllers ]; then
|
|||
> /sys/fs/cgroup/cgroup.subtree_control
|
||||
fi
|
||||
|
||||
# Change mount propagation to shared to make the environment more similar to a
|
||||
# modern Linux system, e.g. with SystemD as PID 1.
|
||||
mount --make-rshared /
|
||||
|
||||
if [ $# -gt 0 ]; then
|
||||
exec "$@"
|
||||
fi
|
||||
|
|
|
@ -13,6 +13,11 @@ if [ ! -t 0 ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
# Change mount propagation to shared, which SystemD PID 1 would normally do
|
||||
# itself when started by the kernel. SystemD skips that when it detects it is
|
||||
# running in a container.
|
||||
mount --make-rshared /
|
||||
|
||||
env > /etc/docker-entrypoint-env
|
||||
|
||||
cat > /etc/systemd/system/docker-entrypoint.target << EOF
|
||||
|
|
|
@ -158,16 +158,23 @@ func TestCopyFromContainer(t *testing.T) {
|
|||
expect map[string]string
|
||||
}{
|
||||
{"/", map[string]string{"/": "", "/foo": "hello", "/bar/quux/baz": "world", "/bar/filesymlink": "", "/bar/dirsymlink": "", "/bar/notarget": ""}},
|
||||
{".", map[string]string{"./": "", "./foo": "hello", "./bar/quux/baz": "world", "./bar/filesymlink": "", "./bar/dirsymlink": "", "./bar/notarget": ""}},
|
||||
{"/.", map[string]string{"./": "", "./foo": "hello", "./bar/quux/baz": "world", "./bar/filesymlink": "", "./bar/dirsymlink": "", "./bar/notarget": ""}},
|
||||
{"./", map[string]string{"./": "", "./foo": "hello", "./bar/quux/baz": "world", "./bar/filesymlink": "", "./bar/dirsymlink": "", "./bar/notarget": ""}},
|
||||
{"/./", map[string]string{"./": "", "./foo": "hello", "./bar/quux/baz": "world", "./bar/filesymlink": "", "./bar/dirsymlink": "", "./bar/notarget": ""}},
|
||||
{"/bar/root", map[string]string{"root": ""}},
|
||||
{"/bar/root/", map[string]string{"root/": "", "root/foo": "hello", "root/bar/quux/baz": "world", "root/bar/filesymlink": "", "root/bar/dirsymlink": "", "root/bar/notarget": ""}},
|
||||
{"/bar/root/.", map[string]string{"./": "", "./foo": "hello", "./bar/quux/baz": "world", "./bar/filesymlink": "", "./bar/dirsymlink": "", "./bar/notarget": ""}},
|
||||
|
||||
{"bar/quux", map[string]string{"quux/": "", "quux/baz": "world"}},
|
||||
{"bar/quux/", map[string]string{"quux/": "", "quux/baz": "world"}},
|
||||
{"bar/quux/.", map[string]string{"./": "", "./baz": "world"}},
|
||||
{"bar/quux/baz", map[string]string{"baz": "world"}},
|
||||
|
||||
{"bar/filesymlink", map[string]string{"filesymlink": ""}},
|
||||
{"bar/dirsymlink", map[string]string{"dirsymlink": ""}},
|
||||
{"bar/dirsymlink/", map[string]string{"dirsymlink/": "", "dirsymlink/baz": "world"}},
|
||||
{"bar/dirsymlink/.", map[string]string{"./": "", "./baz": "world"}},
|
||||
{"bar/notarget", map[string]string{"notarget": ""}},
|
||||
} {
|
||||
t.Run(x.src, func(t *testing.T) {
|
||||
|
|
|
@ -393,3 +393,38 @@ func TestContainerVolumesMountedAsSlave(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Regression test for #38995 and #43390.
|
||||
func TestContainerCopyLeaksMounts(t *testing.T) {
|
||||
defer setupTest(t)()
|
||||
|
||||
bindMount := mounttypes.Mount{
|
||||
Type: mounttypes.TypeBind,
|
||||
Source: "/var",
|
||||
Target: "/hostvar",
|
||||
BindOptions: &mounttypes.BindOptions{
|
||||
Propagation: mounttypes.PropagationRSlave,
|
||||
},
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
client := testEnv.APIClient()
|
||||
cid := container.Run(ctx, t, client, container.WithMount(bindMount), container.WithCmd("sleep", "120s"))
|
||||
|
||||
getMounts := func() string {
|
||||
t.Helper()
|
||||
res, err := container.Exec(ctx, client, cid, []string{"cat", "/proc/self/mountinfo"})
|
||||
assert.NilError(t, err)
|
||||
assert.Equal(t, res.ExitCode, 0)
|
||||
return res.Stdout()
|
||||
}
|
||||
|
||||
mountsBefore := getMounts()
|
||||
|
||||
_, _, err := client.CopyFromContainer(ctx, cid, "/etc/passwd")
|
||||
assert.NilError(t, err)
|
||||
|
||||
mountsAfter := getMounts()
|
||||
|
||||
assert.Equal(t, mountsBefore, mountsAfter)
|
||||
}
|
||||
|
|
94
internal/mounttree/switchroot_linux.go
Normal file
94
internal/mounttree/switchroot_linux.go
Normal file
|
@ -0,0 +1,94 @@
|
|||
package mounttree // import "github.com/docker/docker/internal/mounttree"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/moby/sys/mount"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// SwitchRoot changes path to be the root of the mount tree and changes the
|
||||
// current working directory to the new root.
|
||||
//
|
||||
// This function bind-mounts onto path; it is the caller's responsibility to set
|
||||
// the desired propagation mode of path's parent mount beforehand to prevent
|
||||
// unwanted propagation into different mount namespaces.
|
||||
func SwitchRoot(path string) error {
|
||||
if mounted, _ := mountinfo.Mounted(path); !mounted {
|
||||
if err := mount.Mount(path, path, "bind", "rbind,rw"); err != nil {
|
||||
return realChroot(path)
|
||||
}
|
||||
}
|
||||
|
||||
// setup oldRoot for pivot_root
|
||||
pivotDir, err := os.MkdirTemp(path, ".pivot_root")
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error setting up pivot dir: %v", err)
|
||||
}
|
||||
|
||||
var mounted bool
|
||||
defer func() {
|
||||
if mounted {
|
||||
// make sure pivotDir is not mounted before we try to remove it
|
||||
if errCleanup := unix.Unmount(pivotDir, unix.MNT_DETACH); errCleanup != nil {
|
||||
if err == nil {
|
||||
err = errCleanup
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
errCleanup := os.Remove(pivotDir)
|
||||
// pivotDir doesn't exist if pivot_root failed and chroot+chdir was successful
|
||||
// because we already cleaned it up on failed pivot_root
|
||||
if errCleanup != nil && !os.IsNotExist(errCleanup) {
|
||||
errCleanup = fmt.Errorf("Error cleaning up after pivot: %v", errCleanup)
|
||||
if err == nil {
|
||||
err = errCleanup
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if err := unix.PivotRoot(path, pivotDir); err != nil {
|
||||
// If pivot fails, fall back to the normal chroot after cleaning up temp dir
|
||||
if err := os.Remove(pivotDir); err != nil {
|
||||
return fmt.Errorf("Error cleaning up after failed pivot: %v", err)
|
||||
}
|
||||
return realChroot(path)
|
||||
}
|
||||
mounted = true
|
||||
|
||||
// This is the new path for where the old root (prior to the pivot) has been moved to
|
||||
// This dir contains the rootfs of the caller, which we need to remove so it is not visible during extraction
|
||||
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
|
||||
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("Error changing to new root: %v", err)
|
||||
}
|
||||
|
||||
// Make the pivotDir (where the old root lives) private so it can be unmounted without propagating to the host
|
||||
if err := unix.Mount("", pivotDir, "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil {
|
||||
return fmt.Errorf("Error making old root private after pivot: %v", err)
|
||||
}
|
||||
|
||||
// Now unmount the old root so it's no longer visible from the new root
|
||||
if err := unix.Unmount(pivotDir, unix.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("Error while unmounting old root after pivot: %v", err)
|
||||
}
|
||||
mounted = false
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func realChroot(path string) error {
|
||||
if err := unix.Chroot(path); err != nil {
|
||||
return fmt.Errorf("Error after fallback to chroot: %v", err)
|
||||
}
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("Error changing to new root after chroot: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
176
internal/unshare/unshare_linux.go
Normal file
176
internal/unshare/unshare_linux.go
Normal file
|
@ -0,0 +1,176 @@
|
|||
//go:build go1.10
|
||||
// +build go1.10
|
||||
|
||||
package unshare // import "github.com/docker/docker/internal/unshare"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// The startup thread of a process is special in a few different ways.
|
||||
// Most pertinent to the discussion at hand, any per-thread kernel state
|
||||
// reflected in the /proc/[pid]/ directory for a process is taken from
|
||||
// the state of the startup thread. Same goes for /proc/self/; it shows
|
||||
// the state of the current process' startup thread, no matter which
|
||||
// thread the files are being opened from. For most programs this is a
|
||||
// distinction without a difference as the kernel state, such as the
|
||||
// mount namespace and current working directory, is shared among (and
|
||||
// kept synchronized across) all threads of a process. But things start
|
||||
// to break down once threads start unsharing and modifying parts of
|
||||
// their kernel state.
|
||||
//
|
||||
// The Go runtime schedules goroutines to execute on the startup thread,
|
||||
// same as any other. How this could be problematic is best illustrated
|
||||
// with a concrete example. Consider what happens if a call to
|
||||
// Go(unix.CLONE_NEWNS, ...) spawned a goroutine which gets scheduled
|
||||
// onto the startup thread. The thread's mount namespace will be
|
||||
// unshared and modified. The contents of the /proc/[pid]/mountinfo file
|
||||
// will then describe the mount tree of the unshared namespace, not the
|
||||
// namespace of any other thread. It will remain this way until the
|
||||
// process exits. (The startup thread is special in another way: exiting
|
||||
// it puts the process into a "non-waitable zombie" state. To avoid this
|
||||
// fate, the Go runtime parks the thread instead of exiting if a
|
||||
// goroutine returns while locked to the startup thread. More
|
||||
// information can be found in the Go runtime sources:
|
||||
// `go doc -u -src runtime.mexit`.) The github.com/moby/sys/mountinfo
|
||||
// package reads from /proc/self/mountinfo, so will read the mount tree
|
||||
// for the wrong namespace if the startup thread has had its mount
|
||||
// namespace unshared! The /proc/thread-self/ directory, introduced in
|
||||
// Linux 3.17, is one potential solution to this problem, but every
|
||||
// package which opens files in /proc/self/ would need to be updated,
|
||||
// and fallbacks to /proc/self/task/[tid]/ would be required to support
|
||||
// older kernels. Overlooking any reference to /proc/self/ would
|
||||
// manifest as stochastically-reproducible bugs, so this is far from an
|
||||
// ideal solution.
|
||||
//
|
||||
// Reading from /proc/self/ would not be a problem if we could prevent
|
||||
// the per-thread state of the startup thread from being modified
|
||||
// nondeterministically in the first place. We can accomplish this
|
||||
// simply by locking the main() function to the startup thread! Doing so
|
||||
// excludes any other goroutine from being scheduled on the thread.
|
||||
runtime.LockOSThread()
|
||||
}
|
||||
|
||||
// reversibleSetnsFlags maps the unshare(2) flags whose effects can be fully
|
||||
// reversed using setns(2). The values are the basenames of the corresponding
|
||||
// /proc/self/task/[tid]/ns/ magic symlinks to use to save and restore the
|
||||
// state.
|
||||
var reversibleSetnsFlags = map[int]string{
|
||||
unix.CLONE_NEWCGROUP: "cgroup",
|
||||
unix.CLONE_NEWNET: "net",
|
||||
unix.CLONE_NEWUTS: "uts",
|
||||
unix.CLONE_NEWPID: "pid",
|
||||
unix.CLONE_NEWTIME: "time",
|
||||
|
||||
// The following CLONE_NEW* flags are not included because they imply
|
||||
// another, irreversible flag when used with unshare(2).
|
||||
// - unix.CLONE_NEWIPC: implies CLONE_SYSVMEM
|
||||
// - unix.CLONE_NEWNS: implies CLONE_FS
|
||||
// - unix.CLONE_NEWUSER: implies CLONE_FS since Linux 3.9
|
||||
}
|
||||
|
||||
// Go calls the given functions in a new goroutine, locked to an OS thread,
|
||||
// which has had the parts of its execution state disassociated from the rest of
|
||||
// the current process using [unshare(2)]. It blocks until the new goroutine has
|
||||
// started and setupfn has returned. fn is only called if setupfn returns nil. A
|
||||
// nil setupfn or fn is equivalent to passing a no-op function.
|
||||
//
|
||||
// The disassociated execution state and any changes made to it are only visible
|
||||
// to the goroutine which the functions are called in. Any other goroutines,
|
||||
// including ones started from the function, will see the same execution state
|
||||
// as the rest of the process.
|
||||
//
|
||||
// The acceptable flags are documented in the [unshare(2)] Linux man-page.
|
||||
// The corresponding CLONE_* constants are defined in package [unix].
|
||||
//
|
||||
// # Warning
|
||||
//
|
||||
// This function may terminate the thread which the new goroutine executed on
|
||||
// after fn returns, which could cause subprocesses started with the
|
||||
// [syscall.SysProcAttr] Pdeathsig field set to be signaled before process
|
||||
// termination. Any subprocess started before this function is called may be
|
||||
// affected, in addition to any subprocesses started inside setupfn or fn.
|
||||
// There are more details at https://go.dev/issue/27505.
|
||||
//
|
||||
// [unshare(2)]: https://man7.org/linux/man-pages/man2/unshare.2.html
|
||||
func Go(flags int, setupfn func() error, fn func()) error {
|
||||
started := make(chan error)
|
||||
|
||||
maskedFlags := flags
|
||||
for f := range reversibleSetnsFlags {
|
||||
maskedFlags &^= f
|
||||
}
|
||||
isReversible := maskedFlags == 0
|
||||
|
||||
go func() {
|
||||
// Prepare to manipulate per-thread kernel state.
|
||||
runtime.LockOSThread()
|
||||
|
||||
// Not all changes to the execution state can be reverted.
|
||||
// If an irreversible change to the execution state is made, our
|
||||
// only recourse is to have the tampered thread terminated by
|
||||
// returning from this function while the goroutine remains
|
||||
// wired to the thread. The Go runtime will terminate the thread
|
||||
// and replace it with a fresh one as needed.
|
||||
|
||||
if isReversible {
|
||||
defer func() {
|
||||
if isReversible {
|
||||
// All execution state has been restored without error.
|
||||
// The thread is once again fungible.
|
||||
runtime.UnlockOSThread()
|
||||
}
|
||||
}()
|
||||
tid := unix.Gettid()
|
||||
for f, ns := range reversibleSetnsFlags {
|
||||
if flags&f != f {
|
||||
continue
|
||||
}
|
||||
// The /proc/thread-self directory was added in Linux 3.17.
|
||||
// We are not using it to maximize compatibility.
|
||||
pth := fmt.Sprintf("/proc/self/task/%d/ns/%s", tid, ns)
|
||||
fd, err := unix.Open(pth, unix.O_RDONLY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
started <- &os.PathError{Op: "open", Path: pth, Err: err}
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if isReversible {
|
||||
if err := unix.Setns(fd, 0); err != nil {
|
||||
isReversible = false
|
||||
}
|
||||
}
|
||||
_ = unix.Close(fd)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// Threads are implemented under Linux as processes which share
|
||||
// a virtual memory space. Therefore in a multithreaded process
|
||||
// unshare(2) disassociates parts of the calling thread's
|
||||
// context from the thread it was clone(2)'d from.
|
||||
if err := unix.Unshare(flags); err != nil {
|
||||
started <- os.NewSyscallError("unshare", err)
|
||||
return
|
||||
}
|
||||
|
||||
if setupfn != nil {
|
||||
if err := setupfn(); err != nil {
|
||||
started <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
close(started)
|
||||
|
||||
if fn != nil {
|
||||
fn()
|
||||
}
|
||||
}()
|
||||
|
||||
return <-started
|
||||
}
|
|
@ -821,10 +821,29 @@ func Tar(path string, compression Compression) (io.ReadCloser, error) {
|
|||
// TarWithOptions creates an archive from the directory at `path`, only including files whose relative
|
||||
// paths are included in `options.IncludeFiles` (if non-nil) or not in `options.ExcludePatterns`.
|
||||
func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) {
|
||||
// Fix the source path to work with long path names. This is a no-op
|
||||
// on platforms other than Windows.
|
||||
srcPath = fixVolumePathPrefix(srcPath)
|
||||
tb, err := NewTarballer(srcPath, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go tb.Do()
|
||||
return tb.Reader(), nil
|
||||
}
|
||||
|
||||
// Tarballer is a lower-level interface to TarWithOptions which gives the caller
|
||||
// control over which goroutine the archiving operation executes on.
|
||||
type Tarballer struct {
|
||||
srcPath string
|
||||
options *TarOptions
|
||||
pm *patternmatcher.PatternMatcher
|
||||
pipeReader *io.PipeReader
|
||||
pipeWriter *io.PipeWriter
|
||||
compressWriter io.WriteCloser
|
||||
whiteoutConverter tarWhiteoutConverter
|
||||
}
|
||||
|
||||
// NewTarballer constructs a new tarballer. The arguments are the same as for
|
||||
// TarWithOptions.
|
||||
func NewTarballer(srcPath string, options *TarOptions) (*Tarballer, error) {
|
||||
pm, err := patternmatcher.New(options.ExcludePatterns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -842,183 +861,201 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
|
|||
return nil, err
|
||||
}
|
||||
|
||||
go func() {
|
||||
ta := newTarAppender(
|
||||
options.IDMap,
|
||||
compressWriter,
|
||||
options.ChownOpts,
|
||||
)
|
||||
ta.WhiteoutConverter = whiteoutConverter
|
||||
return &Tarballer{
|
||||
// Fix the source path to work with long path names. This is a no-op
|
||||
// on platforms other than Windows.
|
||||
srcPath: fixVolumePathPrefix(srcPath),
|
||||
options: options,
|
||||
pm: pm,
|
||||
pipeReader: pipeReader,
|
||||
pipeWriter: pipeWriter,
|
||||
compressWriter: compressWriter,
|
||||
whiteoutConverter: whiteoutConverter,
|
||||
}, nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
// Make sure to check the error on Close.
|
||||
if err := ta.TarWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close tar writer: %s", err)
|
||||
}
|
||||
if err := compressWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close compress writer: %s", err)
|
||||
}
|
||||
if err := pipeWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close pipe writer: %s", err)
|
||||
}
|
||||
}()
|
||||
// Reader returns the reader for the created archive.
|
||||
func (t *Tarballer) Reader() io.ReadCloser {
|
||||
return t.pipeReader
|
||||
}
|
||||
|
||||
// this buffer is needed for the duration of this piped stream
|
||||
defer pools.BufioWriter32KPool.Put(ta.Buffer)
|
||||
// Do performs the archiving operation in the background. The resulting archive
|
||||
// can be read from t.Reader(). Do should only be called once on each Tarballer
|
||||
// instance.
|
||||
func (t *Tarballer) Do() {
|
||||
ta := newTarAppender(
|
||||
t.options.IDMap,
|
||||
t.compressWriter,
|
||||
t.options.ChownOpts,
|
||||
)
|
||||
ta.WhiteoutConverter = t.whiteoutConverter
|
||||
|
||||
// In general we log errors here but ignore them because
|
||||
// during e.g. a diff operation the container can continue
|
||||
// mutating the filesystem and we can see transient errors
|
||||
// from this
|
||||
|
||||
stat, err := os.Lstat(srcPath)
|
||||
if err != nil {
|
||||
return
|
||||
defer func() {
|
||||
// Make sure to check the error on Close.
|
||||
if err := ta.TarWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close tar writer: %s", err)
|
||||
}
|
||||
|
||||
if !stat.IsDir() {
|
||||
// We can't later join a non-dir with any includes because the
|
||||
// 'walk' will error if "file/." is stat-ed and "file" is not a
|
||||
// directory. So, we must split the source path and use the
|
||||
// basename as the include.
|
||||
if len(options.IncludeFiles) > 0 {
|
||||
logrus.Warn("Tar: Can't archive a file with includes")
|
||||
}
|
||||
|
||||
dir, base := SplitPathDirEntry(srcPath)
|
||||
srcPath = dir
|
||||
options.IncludeFiles = []string{base}
|
||||
if err := t.compressWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close compress writer: %s", err)
|
||||
}
|
||||
|
||||
if len(options.IncludeFiles) == 0 {
|
||||
options.IncludeFiles = []string{"."}
|
||||
}
|
||||
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, include := range options.IncludeFiles {
|
||||
rebaseName := options.RebaseNames[include]
|
||||
|
||||
var (
|
||||
parentMatchInfo []patternmatcher.MatchInfo
|
||||
parentDirs []string
|
||||
)
|
||||
|
||||
walkRoot := getWalkRoot(srcPath, include)
|
||||
filepath.WalkDir(walkRoot, func(filePath string, f os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
logrus.Errorf("Tar: Can't stat file %s to tar: %s", srcPath, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
relFilePath, err := filepath.Rel(srcPath, filePath)
|
||||
if err != nil || (!options.IncludeSourceDir && relFilePath == "." && f.IsDir()) {
|
||||
// Error getting relative path OR we are looking
|
||||
// at the source directory path. Skip in both situations.
|
||||
return nil
|
||||
}
|
||||
|
||||
if options.IncludeSourceDir && include == "." && relFilePath != "." {
|
||||
relFilePath = strings.Join([]string{".", relFilePath}, string(filepath.Separator))
|
||||
}
|
||||
|
||||
skip := false
|
||||
|
||||
// If "include" is an exact match for the current file
|
||||
// then even if there's an "excludePatterns" pattern that
|
||||
// matches it, don't skip it. IOW, assume an explicit 'include'
|
||||
// is asking for that file no matter what - which is true
|
||||
// for some files, like .dockerignore and Dockerfile (sometimes)
|
||||
if include != relFilePath {
|
||||
for len(parentDirs) != 0 {
|
||||
lastParentDir := parentDirs[len(parentDirs)-1]
|
||||
if strings.HasPrefix(relFilePath, lastParentDir+string(os.PathSeparator)) {
|
||||
break
|
||||
}
|
||||
parentDirs = parentDirs[:len(parentDirs)-1]
|
||||
parentMatchInfo = parentMatchInfo[:len(parentMatchInfo)-1]
|
||||
}
|
||||
|
||||
var matchInfo patternmatcher.MatchInfo
|
||||
if len(parentMatchInfo) != 0 {
|
||||
skip, matchInfo, err = pm.MatchesUsingParentResults(relFilePath, parentMatchInfo[len(parentMatchInfo)-1])
|
||||
} else {
|
||||
skip, matchInfo, err = pm.MatchesUsingParentResults(relFilePath, patternmatcher.MatchInfo{})
|
||||
}
|
||||
if err != nil {
|
||||
logrus.Errorf("Error matching %s: %v", relFilePath, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if f.IsDir() {
|
||||
parentDirs = append(parentDirs, relFilePath)
|
||||
parentMatchInfo = append(parentMatchInfo, matchInfo)
|
||||
}
|
||||
}
|
||||
|
||||
if skip {
|
||||
// If we want to skip this file and its a directory
|
||||
// then we should first check to see if there's an
|
||||
// excludes pattern (e.g. !dir/file) that starts with this
|
||||
// dir. If so then we can't skip this dir.
|
||||
|
||||
// Its not a dir then so we can just return/skip.
|
||||
if !f.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// No exceptions (!...) in patterns so just skip dir
|
||||
if !pm.Exclusions() {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
dirSlash := relFilePath + string(filepath.Separator)
|
||||
|
||||
for _, pat := range pm.Patterns() {
|
||||
if !pat.Exclusion() {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(pat.String()+string(filepath.Separator), dirSlash) {
|
||||
// found a match - so can't skip this dir
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// No matching exclusion dir so just skip dir
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
if seen[relFilePath] {
|
||||
return nil
|
||||
}
|
||||
seen[relFilePath] = true
|
||||
|
||||
// Rename the base resource.
|
||||
if rebaseName != "" {
|
||||
var replacement string
|
||||
if rebaseName != string(filepath.Separator) {
|
||||
// Special case the root directory to replace with an
|
||||
// empty string instead so that we don't end up with
|
||||
// double slashes in the paths.
|
||||
replacement = rebaseName
|
||||
}
|
||||
|
||||
relFilePath = strings.Replace(relFilePath, include, replacement, 1)
|
||||
}
|
||||
|
||||
if err := ta.addTarFile(filePath, relFilePath); err != nil {
|
||||
logrus.Errorf("Can't add file %s to tar: %s", filePath, err)
|
||||
// if pipe is broken, stop writing tar stream to it
|
||||
if err == io.ErrClosedPipe {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err := t.pipeWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close pipe writer: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
return pipeReader, nil
|
||||
// this buffer is needed for the duration of this piped stream
|
||||
defer pools.BufioWriter32KPool.Put(ta.Buffer)
|
||||
|
||||
// In general we log errors here but ignore them because
|
||||
// during e.g. a diff operation the container can continue
|
||||
// mutating the filesystem and we can see transient errors
|
||||
// from this
|
||||
|
||||
stat, err := os.Lstat(t.srcPath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !stat.IsDir() {
|
||||
// We can't later join a non-dir with any includes because the
|
||||
// 'walk' will error if "file/." is stat-ed and "file" is not a
|
||||
// directory. So, we must split the source path and use the
|
||||
// basename as the include.
|
||||
if len(t.options.IncludeFiles) > 0 {
|
||||
logrus.Warn("Tar: Can't archive a file with includes")
|
||||
}
|
||||
|
||||
dir, base := SplitPathDirEntry(t.srcPath)
|
||||
t.srcPath = dir
|
||||
t.options.IncludeFiles = []string{base}
|
||||
}
|
||||
|
||||
if len(t.options.IncludeFiles) == 0 {
|
||||
t.options.IncludeFiles = []string{"."}
|
||||
}
|
||||
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, include := range t.options.IncludeFiles {
|
||||
rebaseName := t.options.RebaseNames[include]
|
||||
|
||||
var (
|
||||
parentMatchInfo []patternmatcher.MatchInfo
|
||||
parentDirs []string
|
||||
)
|
||||
|
||||
walkRoot := getWalkRoot(t.srcPath, include)
|
||||
filepath.WalkDir(walkRoot, func(filePath string, f os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
logrus.Errorf("Tar: Can't stat file %s to tar: %s", t.srcPath, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
relFilePath, err := filepath.Rel(t.srcPath, filePath)
|
||||
if err != nil || (!t.options.IncludeSourceDir && relFilePath == "." && f.IsDir()) {
|
||||
// Error getting relative path OR we are looking
|
||||
// at the source directory path. Skip in both situations.
|
||||
return nil
|
||||
}
|
||||
|
||||
if t.options.IncludeSourceDir && include == "." && relFilePath != "." {
|
||||
relFilePath = strings.Join([]string{".", relFilePath}, string(filepath.Separator))
|
||||
}
|
||||
|
||||
skip := false
|
||||
|
||||
// If "include" is an exact match for the current file
|
||||
// then even if there's an "excludePatterns" pattern that
|
||||
// matches it, don't skip it. IOW, assume an explicit 'include'
|
||||
// is asking for that file no matter what - which is true
|
||||
// for some files, like .dockerignore and Dockerfile (sometimes)
|
||||
if include != relFilePath {
|
||||
for len(parentDirs) != 0 {
|
||||
lastParentDir := parentDirs[len(parentDirs)-1]
|
||||
if strings.HasPrefix(relFilePath, lastParentDir+string(os.PathSeparator)) {
|
||||
break
|
||||
}
|
||||
parentDirs = parentDirs[:len(parentDirs)-1]
|
||||
parentMatchInfo = parentMatchInfo[:len(parentMatchInfo)-1]
|
||||
}
|
||||
|
||||
var matchInfo patternmatcher.MatchInfo
|
||||
if len(parentMatchInfo) != 0 {
|
||||
skip, matchInfo, err = t.pm.MatchesUsingParentResults(relFilePath, parentMatchInfo[len(parentMatchInfo)-1])
|
||||
} else {
|
||||
skip, matchInfo, err = t.pm.MatchesUsingParentResults(relFilePath, patternmatcher.MatchInfo{})
|
||||
}
|
||||
if err != nil {
|
||||
logrus.Errorf("Error matching %s: %v", relFilePath, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if f.IsDir() {
|
||||
parentDirs = append(parentDirs, relFilePath)
|
||||
parentMatchInfo = append(parentMatchInfo, matchInfo)
|
||||
}
|
||||
}
|
||||
|
||||
if skip {
|
||||
// If we want to skip this file and its a directory
|
||||
// then we should first check to see if there's an
|
||||
// excludes pattern (e.g. !dir/file) that starts with this
|
||||
// dir. If so then we can't skip this dir.
|
||||
|
||||
// Its not a dir then so we can just return/skip.
|
||||
if !f.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// No exceptions (!...) in patterns so just skip dir
|
||||
if !t.pm.Exclusions() {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
dirSlash := relFilePath + string(filepath.Separator)
|
||||
|
||||
for _, pat := range t.pm.Patterns() {
|
||||
if !pat.Exclusion() {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(pat.String()+string(filepath.Separator), dirSlash) {
|
||||
// found a match - so can't skip this dir
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// No matching exclusion dir so just skip dir
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
if seen[relFilePath] {
|
||||
return nil
|
||||
}
|
||||
seen[relFilePath] = true
|
||||
|
||||
// Rename the base resource.
|
||||
if rebaseName != "" {
|
||||
var replacement string
|
||||
if rebaseName != string(filepath.Separator) {
|
||||
// Special case the root directory to replace with an
|
||||
// empty string instead so that we don't end up with
|
||||
// double slashes in the paths.
|
||||
replacement = rebaseName
|
||||
}
|
||||
|
||||
relFilePath = strings.Replace(relFilePath, include, replacement, 1)
|
||||
}
|
||||
|
||||
if err := ta.addTarFile(filePath, relFilePath); err != nil {
|
||||
logrus.Errorf("Can't add file %s to tar: %s", filePath, err)
|
||||
// if pipe is broken, stop writing tar stream to it
|
||||
if err == io.ErrClosedPipe {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Unpack unpacks the decompressedArchive to dest with options.
|
||||
|
|
|
@ -87,7 +87,7 @@ func UnpackLayer(dest string, layer io.Reader, options *TarOptions) (size int64,
|
|||
basename := filepath.Base(hdr.Name)
|
||||
aufsHardlinks[basename] = hdr
|
||||
if aufsTempdir == "" {
|
||||
if aufsTempdir, err = os.MkdirTemp("", "dockerplnk"); err != nil {
|
||||
if aufsTempdir, err = os.MkdirTemp(dest, "dockerplnk"); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer os.RemoveAll(aufsTempdir)
|
||||
|
|
|
@ -4,223 +4,71 @@
|
|||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
"github.com/docker/docker/pkg/reexec"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// untar is the entry-point for docker-untar on re-exec. This is not used on
|
||||
// Windows as it does not support chroot, hence no point sandboxing through
|
||||
// chroot and rexec.
|
||||
func untar() {
|
||||
runtime.LockOSThread()
|
||||
flag.Parse()
|
||||
|
||||
var options archive.TarOptions
|
||||
|
||||
// read the options from the pipe "ExtraFiles"
|
||||
if err := json.NewDecoder(os.NewFile(3, "options")).Decode(&options); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
dst := flag.Arg(0)
|
||||
var root string
|
||||
if len(flag.Args()) > 1 {
|
||||
root = flag.Arg(1)
|
||||
}
|
||||
|
||||
if root == "" {
|
||||
root = dst
|
||||
}
|
||||
|
||||
if err := chroot(root); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
if err := archive.Unpack(os.Stdin, dst, &options); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
// fully consume stdin in case it is zero padded
|
||||
if _, err := flush(os.Stdin); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func invokeUnpack(decompressedArchive io.Reader, dest string, options *archive.TarOptions, root string) error {
|
||||
if root == "" {
|
||||
return errors.New("must specify a root to chroot to")
|
||||
}
|
||||
|
||||
// We can't pass a potentially large exclude list directly via cmd line
|
||||
// because we easily overrun the kernel's max argument/environment size
|
||||
// when the full image list is passed (e.g. when this is used by
|
||||
// `docker load`). We will marshall the options via a pipe to the
|
||||
// child
|
||||
r, w, err := os.Pipe()
|
||||
relDest, err := resolvePathInChroot(root, dest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Untar pipe failure: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if root != "" {
|
||||
relDest, err := filepath.Rel(root, dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if relDest == "." {
|
||||
relDest = "/"
|
||||
}
|
||||
if relDest[0] != '/' {
|
||||
relDest = "/" + relDest
|
||||
}
|
||||
dest = relDest
|
||||
}
|
||||
|
||||
cmd := reexec.Command("docker-untar", dest, root)
|
||||
cmd.Stdin = decompressedArchive
|
||||
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, r)
|
||||
output := bytes.NewBuffer(nil)
|
||||
cmd.Stdout = output
|
||||
cmd.Stderr = output
|
||||
|
||||
// reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux, which
|
||||
// causes the started process to be signaled when the creating OS thread
|
||||
// dies. Ensure that the reexec is not prematurely signaled. See
|
||||
// https://go.dev/issue/27505 for more information.
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if err := cmd.Start(); err != nil {
|
||||
w.Close()
|
||||
return fmt.Errorf("Untar error on re-exec cmd: %v", err)
|
||||
}
|
||||
|
||||
// write the options to the pipe for the untar exec to read
|
||||
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||
w.Close()
|
||||
return fmt.Errorf("Untar json encode to pipe failed: %v", err)
|
||||
}
|
||||
w.Close()
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
// when `xz -d -c -q | docker-untar ...` failed on docker-untar side,
|
||||
// we need to exhaust `xz`'s output, otherwise the `xz` side will be
|
||||
// pending on write pipe forever
|
||||
io.Copy(io.Discard, decompressedArchive)
|
||||
|
||||
return fmt.Errorf("Error processing tar file(%v): %s", err, output)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tar() {
|
||||
runtime.LockOSThread()
|
||||
flag.Parse()
|
||||
|
||||
src := flag.Arg(0)
|
||||
var root string
|
||||
if len(flag.Args()) > 1 {
|
||||
root = flag.Arg(1)
|
||||
}
|
||||
|
||||
if root == "" {
|
||||
root = src
|
||||
}
|
||||
|
||||
if err := realChroot(root); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
var options archive.TarOptions
|
||||
if err := json.NewDecoder(os.Stdin).Decode(&options); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
rdr, err := archive.TarWithOptions(src, &options)
|
||||
done := make(chan error)
|
||||
err = goInChroot(root, func() { done <- archive.Unpack(decompressedArchive, relDest, options) })
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
return err
|
||||
}
|
||||
defer rdr.Close()
|
||||
|
||||
if _, err := io.Copy(os.Stdout, rdr); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
os.Exit(0)
|
||||
return <-done
|
||||
}
|
||||
|
||||
func invokePack(srcPath string, options *archive.TarOptions, root string) (io.ReadCloser, error) {
|
||||
if root == "" {
|
||||
return nil, errors.New("root path must not be empty")
|
||||
}
|
||||
|
||||
relSrc, err := filepath.Rel(root, srcPath)
|
||||
relSrc, err := resolvePathInChroot(root, srcPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if relSrc == "." {
|
||||
relSrc = "/"
|
||||
}
|
||||
if relSrc[0] != '/' {
|
||||
relSrc = "/" + relSrc
|
||||
}
|
||||
|
||||
// make sure we didn't trim a trailing slash with the call to `Rel`
|
||||
// make sure we didn't trim a trailing slash with the call to `resolvePathInChroot`
|
||||
if strings.HasSuffix(srcPath, "/") && !strings.HasSuffix(relSrc, "/") {
|
||||
relSrc += "/"
|
||||
}
|
||||
|
||||
cmd := reexec.Command("docker-tar", relSrc, root)
|
||||
|
||||
errBuff := bytes.NewBuffer(nil)
|
||||
cmd.Stderr = errBuff
|
||||
|
||||
tarR, tarW := io.Pipe()
|
||||
cmd.Stdout = tarW
|
||||
|
||||
stdin, err := cmd.StdinPipe()
|
||||
tb, err := archive.NewTarballer(relSrc, options)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "error getting options pipe for tar process")
|
||||
return nil, errors.Wrap(err, "error processing tar file")
|
||||
}
|
||||
|
||||
started := make(chan error)
|
||||
go func() {
|
||||
// reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux,
|
||||
// which causes the started process to be signaled when the
|
||||
// creating OS thread dies. Ensure that the subprocess is not
|
||||
// prematurely signaled. See https://go.dev/issue/27505 for more
|
||||
// information.
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if err := cmd.Start(); err != nil {
|
||||
started <- err
|
||||
return
|
||||
}
|
||||
close(started)
|
||||
err := cmd.Wait()
|
||||
err = errors.Wrapf(err, "error processing tar file: %s", errBuff)
|
||||
tarW.CloseWithError(err)
|
||||
}()
|
||||
if err := <-started; err != nil {
|
||||
return nil, errors.Wrap(err, "tar error on re-exec cmd")
|
||||
err = goInChroot(root, tb.Do)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not chroot")
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(stdin).Encode(options); err != nil {
|
||||
stdin.Close()
|
||||
return nil, errors.Wrap(err, "tar json encode to pipe failed")
|
||||
}
|
||||
stdin.Close()
|
||||
|
||||
return tarR, nil
|
||||
return tb.Reader(), nil
|
||||
}
|
||||
|
||||
// resolvePathInChroot returns the equivalent to path inside a chroot rooted at root.
|
||||
// The returned path always begins with '/'.
|
||||
//
|
||||
// - resolvePathInChroot("/a/b", "/a/b/c/d") -> "/c/d"
|
||||
// - resolvePathInChroot("/a/b", "/a/b") -> "/"
|
||||
//
|
||||
// The implementation is buggy, and some bugs may be load-bearing.
|
||||
// Here be dragons.
|
||||
func resolvePathInChroot(root, path string) (string, error) {
|
||||
if root == "" {
|
||||
return "", errors.New("root path must not be empty")
|
||||
}
|
||||
rel, err := filepath.Rel(root, path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if rel == "." {
|
||||
rel = "/"
|
||||
}
|
||||
if rel[0] != '/' {
|
||||
rel = "/" + rel
|
||||
}
|
||||
return rel, nil
|
||||
}
|
||||
|
|
|
@ -7,11 +7,6 @@ import (
|
|||
"github.com/docker/docker/pkg/longpath"
|
||||
)
|
||||
|
||||
// chroot is not supported by Windows
|
||||
func chroot(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func invokeUnpack(decompressedArchive io.ReadCloser,
|
||||
dest string,
|
||||
options *archive.TarOptions, root string) error {
|
||||
|
|
|
@ -1,113 +1,34 @@
|
|||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/containerd/containerd/pkg/userns"
|
||||
"github.com/docker/docker/internal/mounttree"
|
||||
"github.com/docker/docker/internal/unshare"
|
||||
"github.com/moby/sys/mount"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// chroot on linux uses pivot_root instead of chroot
|
||||
// pivot_root takes a new root and an old root.
|
||||
// Old root must be a sub-dir of new root, it is where the current rootfs will reside after the call to pivot_root.
|
||||
// New root is where the new rootfs is set to.
|
||||
// Old root is removed after the call to pivot_root so it is no longer available under the new root.
|
||||
// This is similar to how libcontainer sets up a container's rootfs
|
||||
func chroot(path string) (err error) {
|
||||
// if the engine is running in a user namespace we need to use actual chroot
|
||||
if userns.RunningInUserNS() {
|
||||
return realChroot(path)
|
||||
}
|
||||
if err := unix.Unshare(unix.CLONE_NEWNS); err != nil {
|
||||
return fmt.Errorf("Error creating mount namespace before pivot: %v", err)
|
||||
}
|
||||
|
||||
// Make everything in new ns slave.
|
||||
// Don't use `private` here as this could race where the mountns gets a
|
||||
// reference to a mount and an unmount from the host does not propagate,
|
||||
// which could potentially cause transient errors for other operations,
|
||||
// even though this should be relatively small window here `slave` should
|
||||
// not cause any problems.
|
||||
if err := mount.MakeRSlave("/"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if mounted, _ := mountinfo.Mounted(path); !mounted {
|
||||
if err := mount.Mount(path, path, "bind", "rbind,rw"); err != nil {
|
||||
return realChroot(path)
|
||||
}
|
||||
}
|
||||
|
||||
// setup oldRoot for pivot_root
|
||||
pivotDir, err := os.MkdirTemp(path, ".pivot_root")
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error setting up pivot dir: %v", err)
|
||||
}
|
||||
|
||||
var mounted bool
|
||||
defer func() {
|
||||
if mounted {
|
||||
// make sure pivotDir is not mounted before we try to remove it
|
||||
if errCleanup := unix.Unmount(pivotDir, unix.MNT_DETACH); errCleanup != nil {
|
||||
if err == nil {
|
||||
err = errCleanup
|
||||
}
|
||||
return
|
||||
// goInChroot starts fn in a goroutine where the root directory, current working
|
||||
// directory and umask are unshared from other goroutines and the root directory
|
||||
// has been changed to path. These changes are only visible to the goroutine in
|
||||
// which fn is executed. Any other goroutines, including ones started from fn,
|
||||
// will see the same root directory and file system attributes as the rest of
|
||||
// the process.
|
||||
func goInChroot(path string, fn func()) error {
|
||||
return unshare.Go(
|
||||
unix.CLONE_FS|unix.CLONE_NEWNS,
|
||||
func() error {
|
||||
// Make everything in new ns slave.
|
||||
// Don't use `private` here as this could race where the mountns gets a
|
||||
// reference to a mount and an unmount from the host does not propagate,
|
||||
// which could potentially cause transient errors for other operations,
|
||||
// even though this should be relatively small window here `slave` should
|
||||
// not cause any problems.
|
||||
if err := mount.MakeRSlave("/"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
errCleanup := os.Remove(pivotDir)
|
||||
// pivotDir doesn't exist if pivot_root failed and chroot+chdir was successful
|
||||
// because we already cleaned it up on failed pivot_root
|
||||
if errCleanup != nil && !os.IsNotExist(errCleanup) {
|
||||
errCleanup = fmt.Errorf("Error cleaning up after pivot: %v", errCleanup)
|
||||
if err == nil {
|
||||
err = errCleanup
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if err := unix.PivotRoot(path, pivotDir); err != nil {
|
||||
// If pivot fails, fall back to the normal chroot after cleaning up temp dir
|
||||
if err := os.Remove(pivotDir); err != nil {
|
||||
return fmt.Errorf("Error cleaning up after failed pivot: %v", err)
|
||||
}
|
||||
return realChroot(path)
|
||||
}
|
||||
mounted = true
|
||||
|
||||
// This is the new path for where the old root (prior to the pivot) has been moved to
|
||||
// This dir contains the rootfs of the caller, which we need to remove so it is not visible during extraction
|
||||
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
|
||||
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("Error changing to new root: %v", err)
|
||||
}
|
||||
|
||||
// Make the pivotDir (where the old root lives) private so it can be unmounted without propagating to the host
|
||||
if err := unix.Mount("", pivotDir, "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil {
|
||||
return fmt.Errorf("Error making old root private after pivot: %v", err)
|
||||
}
|
||||
|
||||
// Now unmount the old root so it's no longer visible from the new root
|
||||
if err := unix.Unmount(pivotDir, unix.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("Error while unmounting old root after pivot: %v", err)
|
||||
}
|
||||
mounted = false
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func realChroot(path string) error {
|
||||
if err := unix.Chroot(path); err != nil {
|
||||
return fmt.Errorf("Error after fallback to chroot: %v", err)
|
||||
}
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("Error changing to new root after chroot: %v", err)
|
||||
}
|
||||
return nil
|
||||
return mounttree.SwitchRoot(path)
|
||||
},
|
||||
fn,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
//go:build !windows && !linux
|
||||
// +build !windows,!linux
|
||||
|
||||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func chroot(path string) error {
|
||||
if err := unix.Chroot(path); err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chdir("/")
|
||||
}
|
||||
|
||||
func realChroot(path string) error {
|
||||
return chroot(path)
|
||||
}
|
|
@ -4,78 +4,14 @@
|
|||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/containerd/containerd/pkg/userns"
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
"github.com/docker/docker/pkg/reexec"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type applyLayerResponse struct {
|
||||
LayerSize int64 `json:"layerSize"`
|
||||
}
|
||||
|
||||
// applyLayer is the entry-point for docker-applylayer on re-exec. This is not
|
||||
// used on Windows as it does not support chroot, hence no point sandboxing
|
||||
// through chroot and rexec.
|
||||
func applyLayer() {
|
||||
|
||||
var (
|
||||
tmpDir string
|
||||
err error
|
||||
options *archive.TarOptions
|
||||
)
|
||||
runtime.LockOSThread()
|
||||
flag.Parse()
|
||||
|
||||
inUserns := userns.RunningInUserNS()
|
||||
if err := chroot(flag.Arg(0)); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
// We need to be able to set any perms
|
||||
oldmask := unix.Umask(0)
|
||||
defer unix.Umask(oldmask)
|
||||
|
||||
if err := json.Unmarshal([]byte(os.Getenv("OPT")), &options); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
if inUserns {
|
||||
options.InUserNS = true
|
||||
}
|
||||
|
||||
if tmpDir, err = os.MkdirTemp("/", "temp-docker-extract"); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
os.Setenv("TMPDIR", tmpDir)
|
||||
size, err := archive.UnpackLayer("/", os.Stdin, options)
|
||||
os.RemoveAll(tmpDir)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
encoder := json.NewEncoder(os.Stdout)
|
||||
if err := encoder.Encode(applyLayerResponse{size}); err != nil {
|
||||
fatal(fmt.Errorf("unable to encode layerSize JSON: %s", err))
|
||||
}
|
||||
|
||||
if _, err := flush(os.Stdin); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// applyLayerHandler parses a diff in the standard layer format from `layer`, and
|
||||
// applies it to the directory `dest`. Returns the size in bytes of the
|
||||
// contents of the layer.
|
||||
|
@ -92,42 +28,30 @@ func applyLayerHandler(dest string, layer io.Reader, options *archive.TarOptions
|
|||
}
|
||||
if options == nil {
|
||||
options = &archive.TarOptions{}
|
||||
if userns.RunningInUserNS() {
|
||||
options.InUserNS = true
|
||||
}
|
||||
}
|
||||
if userns.RunningInUserNS() {
|
||||
options.InUserNS = true
|
||||
}
|
||||
if options.ExcludePatterns == nil {
|
||||
options.ExcludePatterns = []string{}
|
||||
}
|
||||
|
||||
data, err := json.Marshal(options)
|
||||
type result struct {
|
||||
layerSize int64
|
||||
err error
|
||||
}
|
||||
|
||||
done := make(chan result)
|
||||
err = goInChroot(dest, func() {
|
||||
// We need to be able to set any perms
|
||||
_ = unix.Umask(0)
|
||||
|
||||
size, err := archive.UnpackLayer("/", layer, options)
|
||||
done <- result{layerSize: size, err: err}
|
||||
})
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("ApplyLayer json encode: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
cmd := reexec.Command("docker-applyLayer", dest)
|
||||
cmd.Stdin = layer
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("OPT=%s", data))
|
||||
|
||||
outBuf, errBuf := new(bytes.Buffer), new(bytes.Buffer)
|
||||
cmd.Stdout, cmd.Stderr = outBuf, errBuf
|
||||
|
||||
// reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux, which
|
||||
// causes the started process to be signaled when the creating OS thread
|
||||
// dies. Ensure that the reexec is not prematurely signaled. See
|
||||
// https://go.dev/issue/27505 for more information.
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if err = cmd.Run(); err != nil {
|
||||
return 0, fmt.Errorf("ApplyLayer %s stdout: %s stderr: %s", err, outBuf, errBuf)
|
||||
}
|
||||
|
||||
// Stdout should be a valid JSON struct representing an applyLayerResponse.
|
||||
response := applyLayerResponse{}
|
||||
decoder := json.NewDecoder(outBuf)
|
||||
if err = decoder.Decode(&response); err != nil {
|
||||
return 0, fmt.Errorf("unable to decode ApplyLayer JSON response: %s", err)
|
||||
}
|
||||
|
||||
return response.LayerSize, nil
|
||||
res := <-done
|
||||
return res.layerSize, res.err
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@ package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
|||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
|
@ -29,13 +28,7 @@ func applyLayerHandler(dest string, layer io.Reader, options *archive.TarOptions
|
|||
layer = decompressed
|
||||
}
|
||||
|
||||
tmpDir, err := os.MkdirTemp(os.Getenv("temp"), "temp-docker-extract")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("ApplyLayer failed to create temp-docker-extract under %s. %s", dest, err)
|
||||
}
|
||||
|
||||
s, err := archive.UnpackLayer(dest, layer, nil)
|
||||
os.RemoveAll(tmpDir)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("ApplyLayer %s failed UnpackLayer to %s: %s", layer, dest, err)
|
||||
}
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/docker/docker/pkg/reexec"
|
||||
)
|
||||
|
||||
func init() {
|
||||
reexec.Register("docker-applyLayer", applyLayer)
|
||||
reexec.Register("docker-untar", untar)
|
||||
reexec.Register("docker-tar", tar)
|
||||
}
|
||||
|
||||
func fatal(err error) {
|
||||
fmt.Fprint(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// flush consumes all the bytes from the reader discarding
|
||||
// any errors
|
||||
func flush(r io.Reader) (bytes int64, err error) {
|
||||
return io.Copy(io.Discard, r)
|
||||
}
|
Loading…
Add table
Reference in a new issue