pkg/chrootarchive: stop reexec'ing before chroot
Unshare the thread's file system attributes and, if applicable, mount namespace so that the chroot operation does not affect the rest of the process. Signed-off-by: Cory Snider <csnider@mirantis.com>
This commit is contained in:
parent
f2f884a92f
commit
5de229644f
5 changed files with 359 additions and 480 deletions
|
@ -821,10 +821,29 @@ func Tar(path string, compression Compression) (io.ReadCloser, error) {
|
|||
// TarWithOptions creates an archive from the directory at `path`, only including files whose relative
|
||||
// paths are included in `options.IncludeFiles` (if non-nil) or not in `options.ExcludePatterns`.
|
||||
func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) {
|
||||
// Fix the source path to work with long path names. This is a no-op
|
||||
// on platforms other than Windows.
|
||||
srcPath = fixVolumePathPrefix(srcPath)
|
||||
tb, err := NewTarballer(srcPath, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go tb.Do()
|
||||
return tb.Reader(), nil
|
||||
}
|
||||
|
||||
// Tarballer is a lower-level interface to TarWithOptions which gives the caller
|
||||
// control over which goroutine the archiving operation executes on.
|
||||
type Tarballer struct {
|
||||
srcPath string
|
||||
options *TarOptions
|
||||
pm *patternmatcher.PatternMatcher
|
||||
pipeReader *io.PipeReader
|
||||
pipeWriter *io.PipeWriter
|
||||
compressWriter io.WriteCloser
|
||||
whiteoutConverter tarWhiteoutConverter
|
||||
}
|
||||
|
||||
// NewTarballer constructs a new tarballer. The arguments are the same as for
|
||||
// TarWithOptions.
|
||||
func NewTarballer(srcPath string, options *TarOptions) (*Tarballer, error) {
|
||||
pm, err := patternmatcher.New(options.ExcludePatterns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -842,183 +861,201 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
|
|||
return nil, err
|
||||
}
|
||||
|
||||
go func() {
|
||||
ta := newTarAppender(
|
||||
options.IDMap,
|
||||
compressWriter,
|
||||
options.ChownOpts,
|
||||
)
|
||||
ta.WhiteoutConverter = whiteoutConverter
|
||||
return &Tarballer{
|
||||
// Fix the source path to work with long path names. This is a no-op
|
||||
// on platforms other than Windows.
|
||||
srcPath: fixVolumePathPrefix(srcPath),
|
||||
options: options,
|
||||
pm: pm,
|
||||
pipeReader: pipeReader,
|
||||
pipeWriter: pipeWriter,
|
||||
compressWriter: compressWriter,
|
||||
whiteoutConverter: whiteoutConverter,
|
||||
}, nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
// Make sure to check the error on Close.
|
||||
if err := ta.TarWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close tar writer: %s", err)
|
||||
}
|
||||
if err := compressWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close compress writer: %s", err)
|
||||
}
|
||||
if err := pipeWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close pipe writer: %s", err)
|
||||
}
|
||||
}()
|
||||
// Reader returns the reader for the created archive.
|
||||
func (t *Tarballer) Reader() io.ReadCloser {
|
||||
return t.pipeReader
|
||||
}
|
||||
|
||||
// this buffer is needed for the duration of this piped stream
|
||||
defer pools.BufioWriter32KPool.Put(ta.Buffer)
|
||||
// Do performs the archiving operation in the background. The resulting archive
|
||||
// can be read from t.Reader(). Do should only be called once on each Tarballer
|
||||
// instance.
|
||||
func (t *Tarballer) Do() {
|
||||
ta := newTarAppender(
|
||||
t.options.IDMap,
|
||||
t.compressWriter,
|
||||
t.options.ChownOpts,
|
||||
)
|
||||
ta.WhiteoutConverter = t.whiteoutConverter
|
||||
|
||||
// In general we log errors here but ignore them because
|
||||
// during e.g. a diff operation the container can continue
|
||||
// mutating the filesystem and we can see transient errors
|
||||
// from this
|
||||
|
||||
stat, err := os.Lstat(srcPath)
|
||||
if err != nil {
|
||||
return
|
||||
defer func() {
|
||||
// Make sure to check the error on Close.
|
||||
if err := ta.TarWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close tar writer: %s", err)
|
||||
}
|
||||
|
||||
if !stat.IsDir() {
|
||||
// We can't later join a non-dir with any includes because the
|
||||
// 'walk' will error if "file/." is stat-ed and "file" is not a
|
||||
// directory. So, we must split the source path and use the
|
||||
// basename as the include.
|
||||
if len(options.IncludeFiles) > 0 {
|
||||
logrus.Warn("Tar: Can't archive a file with includes")
|
||||
}
|
||||
|
||||
dir, base := SplitPathDirEntry(srcPath)
|
||||
srcPath = dir
|
||||
options.IncludeFiles = []string{base}
|
||||
if err := t.compressWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close compress writer: %s", err)
|
||||
}
|
||||
|
||||
if len(options.IncludeFiles) == 0 {
|
||||
options.IncludeFiles = []string{"."}
|
||||
}
|
||||
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, include := range options.IncludeFiles {
|
||||
rebaseName := options.RebaseNames[include]
|
||||
|
||||
var (
|
||||
parentMatchInfo []patternmatcher.MatchInfo
|
||||
parentDirs []string
|
||||
)
|
||||
|
||||
walkRoot := getWalkRoot(srcPath, include)
|
||||
filepath.WalkDir(walkRoot, func(filePath string, f os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
logrus.Errorf("Tar: Can't stat file %s to tar: %s", srcPath, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
relFilePath, err := filepath.Rel(srcPath, filePath)
|
||||
if err != nil || (!options.IncludeSourceDir && relFilePath == "." && f.IsDir()) {
|
||||
// Error getting relative path OR we are looking
|
||||
// at the source directory path. Skip in both situations.
|
||||
return nil
|
||||
}
|
||||
|
||||
if options.IncludeSourceDir && include == "." && relFilePath != "." {
|
||||
relFilePath = strings.Join([]string{".", relFilePath}, string(filepath.Separator))
|
||||
}
|
||||
|
||||
skip := false
|
||||
|
||||
// If "include" is an exact match for the current file
|
||||
// then even if there's an "excludePatterns" pattern that
|
||||
// matches it, don't skip it. IOW, assume an explicit 'include'
|
||||
// is asking for that file no matter what - which is true
|
||||
// for some files, like .dockerignore and Dockerfile (sometimes)
|
||||
if include != relFilePath {
|
||||
for len(parentDirs) != 0 {
|
||||
lastParentDir := parentDirs[len(parentDirs)-1]
|
||||
if strings.HasPrefix(relFilePath, lastParentDir+string(os.PathSeparator)) {
|
||||
break
|
||||
}
|
||||
parentDirs = parentDirs[:len(parentDirs)-1]
|
||||
parentMatchInfo = parentMatchInfo[:len(parentMatchInfo)-1]
|
||||
}
|
||||
|
||||
var matchInfo patternmatcher.MatchInfo
|
||||
if len(parentMatchInfo) != 0 {
|
||||
skip, matchInfo, err = pm.MatchesUsingParentResults(relFilePath, parentMatchInfo[len(parentMatchInfo)-1])
|
||||
} else {
|
||||
skip, matchInfo, err = pm.MatchesUsingParentResults(relFilePath, patternmatcher.MatchInfo{})
|
||||
}
|
||||
if err != nil {
|
||||
logrus.Errorf("Error matching %s: %v", relFilePath, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if f.IsDir() {
|
||||
parentDirs = append(parentDirs, relFilePath)
|
||||
parentMatchInfo = append(parentMatchInfo, matchInfo)
|
||||
}
|
||||
}
|
||||
|
||||
if skip {
|
||||
// If we want to skip this file and its a directory
|
||||
// then we should first check to see if there's an
|
||||
// excludes pattern (e.g. !dir/file) that starts with this
|
||||
// dir. If so then we can't skip this dir.
|
||||
|
||||
// Its not a dir then so we can just return/skip.
|
||||
if !f.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// No exceptions (!...) in patterns so just skip dir
|
||||
if !pm.Exclusions() {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
dirSlash := relFilePath + string(filepath.Separator)
|
||||
|
||||
for _, pat := range pm.Patterns() {
|
||||
if !pat.Exclusion() {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(pat.String()+string(filepath.Separator), dirSlash) {
|
||||
// found a match - so can't skip this dir
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// No matching exclusion dir so just skip dir
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
if seen[relFilePath] {
|
||||
return nil
|
||||
}
|
||||
seen[relFilePath] = true
|
||||
|
||||
// Rename the base resource.
|
||||
if rebaseName != "" {
|
||||
var replacement string
|
||||
if rebaseName != string(filepath.Separator) {
|
||||
// Special case the root directory to replace with an
|
||||
// empty string instead so that we don't end up with
|
||||
// double slashes in the paths.
|
||||
replacement = rebaseName
|
||||
}
|
||||
|
||||
relFilePath = strings.Replace(relFilePath, include, replacement, 1)
|
||||
}
|
||||
|
||||
if err := ta.addTarFile(filePath, relFilePath); err != nil {
|
||||
logrus.Errorf("Can't add file %s to tar: %s", filePath, err)
|
||||
// if pipe is broken, stop writing tar stream to it
|
||||
if err == io.ErrClosedPipe {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err := t.pipeWriter.Close(); err != nil {
|
||||
logrus.Errorf("Can't close pipe writer: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
return pipeReader, nil
|
||||
// this buffer is needed for the duration of this piped stream
|
||||
defer pools.BufioWriter32KPool.Put(ta.Buffer)
|
||||
|
||||
// In general we log errors here but ignore them because
|
||||
// during e.g. a diff operation the container can continue
|
||||
// mutating the filesystem and we can see transient errors
|
||||
// from this
|
||||
|
||||
stat, err := os.Lstat(t.srcPath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !stat.IsDir() {
|
||||
// We can't later join a non-dir with any includes because the
|
||||
// 'walk' will error if "file/." is stat-ed and "file" is not a
|
||||
// directory. So, we must split the source path and use the
|
||||
// basename as the include.
|
||||
if len(t.options.IncludeFiles) > 0 {
|
||||
logrus.Warn("Tar: Can't archive a file with includes")
|
||||
}
|
||||
|
||||
dir, base := SplitPathDirEntry(t.srcPath)
|
||||
t.srcPath = dir
|
||||
t.options.IncludeFiles = []string{base}
|
||||
}
|
||||
|
||||
if len(t.options.IncludeFiles) == 0 {
|
||||
t.options.IncludeFiles = []string{"."}
|
||||
}
|
||||
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, include := range t.options.IncludeFiles {
|
||||
rebaseName := t.options.RebaseNames[include]
|
||||
|
||||
var (
|
||||
parentMatchInfo []patternmatcher.MatchInfo
|
||||
parentDirs []string
|
||||
)
|
||||
|
||||
walkRoot := getWalkRoot(t.srcPath, include)
|
||||
filepath.WalkDir(walkRoot, func(filePath string, f os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
logrus.Errorf("Tar: Can't stat file %s to tar: %s", t.srcPath, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
relFilePath, err := filepath.Rel(t.srcPath, filePath)
|
||||
if err != nil || (!t.options.IncludeSourceDir && relFilePath == "." && f.IsDir()) {
|
||||
// Error getting relative path OR we are looking
|
||||
// at the source directory path. Skip in both situations.
|
||||
return nil
|
||||
}
|
||||
|
||||
if t.options.IncludeSourceDir && include == "." && relFilePath != "." {
|
||||
relFilePath = strings.Join([]string{".", relFilePath}, string(filepath.Separator))
|
||||
}
|
||||
|
||||
skip := false
|
||||
|
||||
// If "include" is an exact match for the current file
|
||||
// then even if there's an "excludePatterns" pattern that
|
||||
// matches it, don't skip it. IOW, assume an explicit 'include'
|
||||
// is asking for that file no matter what - which is true
|
||||
// for some files, like .dockerignore and Dockerfile (sometimes)
|
||||
if include != relFilePath {
|
||||
for len(parentDirs) != 0 {
|
||||
lastParentDir := parentDirs[len(parentDirs)-1]
|
||||
if strings.HasPrefix(relFilePath, lastParentDir+string(os.PathSeparator)) {
|
||||
break
|
||||
}
|
||||
parentDirs = parentDirs[:len(parentDirs)-1]
|
||||
parentMatchInfo = parentMatchInfo[:len(parentMatchInfo)-1]
|
||||
}
|
||||
|
||||
var matchInfo patternmatcher.MatchInfo
|
||||
if len(parentMatchInfo) != 0 {
|
||||
skip, matchInfo, err = t.pm.MatchesUsingParentResults(relFilePath, parentMatchInfo[len(parentMatchInfo)-1])
|
||||
} else {
|
||||
skip, matchInfo, err = t.pm.MatchesUsingParentResults(relFilePath, patternmatcher.MatchInfo{})
|
||||
}
|
||||
if err != nil {
|
||||
logrus.Errorf("Error matching %s: %v", relFilePath, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if f.IsDir() {
|
||||
parentDirs = append(parentDirs, relFilePath)
|
||||
parentMatchInfo = append(parentMatchInfo, matchInfo)
|
||||
}
|
||||
}
|
||||
|
||||
if skip {
|
||||
// If we want to skip this file and its a directory
|
||||
// then we should first check to see if there's an
|
||||
// excludes pattern (e.g. !dir/file) that starts with this
|
||||
// dir. If so then we can't skip this dir.
|
||||
|
||||
// Its not a dir then so we can just return/skip.
|
||||
if !f.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// No exceptions (!...) in patterns so just skip dir
|
||||
if !t.pm.Exclusions() {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
dirSlash := relFilePath + string(filepath.Separator)
|
||||
|
||||
for _, pat := range t.pm.Patterns() {
|
||||
if !pat.Exclusion() {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(pat.String()+string(filepath.Separator), dirSlash) {
|
||||
// found a match - so can't skip this dir
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// No matching exclusion dir so just skip dir
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
if seen[relFilePath] {
|
||||
return nil
|
||||
}
|
||||
seen[relFilePath] = true
|
||||
|
||||
// Rename the base resource.
|
||||
if rebaseName != "" {
|
||||
var replacement string
|
||||
if rebaseName != string(filepath.Separator) {
|
||||
// Special case the root directory to replace with an
|
||||
// empty string instead so that we don't end up with
|
||||
// double slashes in the paths.
|
||||
replacement = rebaseName
|
||||
}
|
||||
|
||||
relFilePath = strings.Replace(relFilePath, include, replacement, 1)
|
||||
}
|
||||
|
||||
if err := ta.addTarFile(filePath, relFilePath); err != nil {
|
||||
logrus.Errorf("Can't add file %s to tar: %s", filePath, err)
|
||||
// if pipe is broken, stop writing tar stream to it
|
||||
if err == io.ErrClosedPipe {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Unpack unpacks the decompressedArchive to dest with options.
|
||||
|
|
|
@ -4,223 +4,71 @@
|
|||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
"github.com/docker/docker/pkg/reexec"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// untar is the entry-point for docker-untar on re-exec. This is not used on
|
||||
// Windows as it does not support chroot, hence no point sandboxing through
|
||||
// chroot and rexec.
|
||||
func untar() {
|
||||
runtime.LockOSThread()
|
||||
flag.Parse()
|
||||
|
||||
var options archive.TarOptions
|
||||
|
||||
// read the options from the pipe "ExtraFiles"
|
||||
if err := json.NewDecoder(os.NewFile(3, "options")).Decode(&options); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
dst := flag.Arg(0)
|
||||
var root string
|
||||
if len(flag.Args()) > 1 {
|
||||
root = flag.Arg(1)
|
||||
}
|
||||
|
||||
if root == "" {
|
||||
root = dst
|
||||
}
|
||||
|
||||
if err := chroot(root); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
if err := archive.Unpack(os.Stdin, dst, &options); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
// fully consume stdin in case it is zero padded
|
||||
if _, err := flush(os.Stdin); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func invokeUnpack(decompressedArchive io.Reader, dest string, options *archive.TarOptions, root string) error {
|
||||
if root == "" {
|
||||
return errors.New("must specify a root to chroot to")
|
||||
}
|
||||
|
||||
// We can't pass a potentially large exclude list directly via cmd line
|
||||
// because we easily overrun the kernel's max argument/environment size
|
||||
// when the full image list is passed (e.g. when this is used by
|
||||
// `docker load`). We will marshall the options via a pipe to the
|
||||
// child
|
||||
r, w, err := os.Pipe()
|
||||
relDest, err := resolvePathInChroot(root, dest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Untar pipe failure: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if root != "" {
|
||||
relDest, err := filepath.Rel(root, dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if relDest == "." {
|
||||
relDest = "/"
|
||||
}
|
||||
if relDest[0] != '/' {
|
||||
relDest = "/" + relDest
|
||||
}
|
||||
dest = relDest
|
||||
}
|
||||
|
||||
cmd := reexec.Command("docker-untar", dest, root)
|
||||
cmd.Stdin = decompressedArchive
|
||||
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, r)
|
||||
output := bytes.NewBuffer(nil)
|
||||
cmd.Stdout = output
|
||||
cmd.Stderr = output
|
||||
|
||||
// reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux, which
|
||||
// causes the started process to be signaled when the creating OS thread
|
||||
// dies. Ensure that the reexec is not prematurely signaled. See
|
||||
// https://go.dev/issue/27505 for more information.
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if err := cmd.Start(); err != nil {
|
||||
w.Close()
|
||||
return fmt.Errorf("Untar error on re-exec cmd: %v", err)
|
||||
}
|
||||
|
||||
// write the options to the pipe for the untar exec to read
|
||||
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||
w.Close()
|
||||
return fmt.Errorf("Untar json encode to pipe failed: %v", err)
|
||||
}
|
||||
w.Close()
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
// when `xz -d -c -q | docker-untar ...` failed on docker-untar side,
|
||||
// we need to exhaust `xz`'s output, otherwise the `xz` side will be
|
||||
// pending on write pipe forever
|
||||
io.Copy(io.Discard, decompressedArchive)
|
||||
|
||||
return fmt.Errorf("Error processing tar file(%v): %s", err, output)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tar() {
|
||||
runtime.LockOSThread()
|
||||
flag.Parse()
|
||||
|
||||
src := flag.Arg(0)
|
||||
var root string
|
||||
if len(flag.Args()) > 1 {
|
||||
root = flag.Arg(1)
|
||||
}
|
||||
|
||||
if root == "" {
|
||||
root = src
|
||||
}
|
||||
|
||||
if err := realChroot(root); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
var options archive.TarOptions
|
||||
if err := json.NewDecoder(os.Stdin).Decode(&options); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
rdr, err := archive.TarWithOptions(src, &options)
|
||||
done := make(chan error)
|
||||
err = Go(root, func() { done <- archive.Unpack(decompressedArchive, relDest, options) })
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
return err
|
||||
}
|
||||
defer rdr.Close()
|
||||
|
||||
if _, err := io.Copy(os.Stdout, rdr); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
os.Exit(0)
|
||||
return <-done
|
||||
}
|
||||
|
||||
func invokePack(srcPath string, options *archive.TarOptions, root string) (io.ReadCloser, error) {
|
||||
if root == "" {
|
||||
return nil, errors.New("root path must not be empty")
|
||||
}
|
||||
|
||||
relSrc, err := filepath.Rel(root, srcPath)
|
||||
relSrc, err := resolvePathInChroot(root, srcPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if relSrc == "." {
|
||||
relSrc = "/"
|
||||
}
|
||||
if relSrc[0] != '/' {
|
||||
relSrc = "/" + relSrc
|
||||
}
|
||||
|
||||
// make sure we didn't trim a trailing slash with the call to `Rel`
|
||||
// make sure we didn't trim a trailing slash with the call to `resolvePathInChroot`
|
||||
if strings.HasSuffix(srcPath, "/") && !strings.HasSuffix(relSrc, "/") {
|
||||
relSrc += "/"
|
||||
}
|
||||
|
||||
cmd := reexec.Command("docker-tar", relSrc, root)
|
||||
|
||||
errBuff := bytes.NewBuffer(nil)
|
||||
cmd.Stderr = errBuff
|
||||
|
||||
tarR, tarW := io.Pipe()
|
||||
cmd.Stdout = tarW
|
||||
|
||||
stdin, err := cmd.StdinPipe()
|
||||
tb, err := archive.NewTarballer(relSrc, options)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "error getting options pipe for tar process")
|
||||
return nil, errors.Wrap(err, "error processing tar file")
|
||||
}
|
||||
|
||||
started := make(chan error)
|
||||
go func() {
|
||||
// reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux,
|
||||
// which causes the started process to be signaled when the
|
||||
// creating OS thread dies. Ensure that the subprocess is not
|
||||
// prematurely signaled. See https://go.dev/issue/27505 for more
|
||||
// information.
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if err := cmd.Start(); err != nil {
|
||||
started <- err
|
||||
return
|
||||
}
|
||||
close(started)
|
||||
err := cmd.Wait()
|
||||
err = errors.Wrapf(err, "error processing tar file: %s", errBuff)
|
||||
tarW.CloseWithError(err)
|
||||
}()
|
||||
if err := <-started; err != nil {
|
||||
return nil, errors.Wrap(err, "tar error on re-exec cmd")
|
||||
err = Go(root, tb.Do)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not chroot")
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(stdin).Encode(options); err != nil {
|
||||
stdin.Close()
|
||||
return nil, errors.Wrap(err, "tar json encode to pipe failed")
|
||||
}
|
||||
stdin.Close()
|
||||
|
||||
return tarR, nil
|
||||
return tb.Reader(), nil
|
||||
}
|
||||
|
||||
// resolvePathInChroot returns the equivalent to path inside a chroot rooted at root.
|
||||
// The returned path always begins with '/'.
|
||||
//
|
||||
// - resolvePathInChroot("/a/b", "/a/b/c/d") -> "/c/d"
|
||||
// - resolvePathInChroot("/a/b", "/a/b") -> "/"
|
||||
//
|
||||
// The implementation is buggy, and some bugs may be load-bearing.
|
||||
// Here be dragons.
|
||||
func resolvePathInChroot(root, path string) (string, error) {
|
||||
if root == "" {
|
||||
return "", errors.New("root path must not be empty")
|
||||
}
|
||||
rel, err := filepath.Rel(root, path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if rel == "." {
|
||||
rel = "/"
|
||||
}
|
||||
if rel[0] != '/' {
|
||||
rel = "/" + rel
|
||||
}
|
||||
return rel, nil
|
||||
}
|
||||
|
|
|
@ -4,71 +4,14 @@
|
|||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/containerd/containerd/pkg/userns"
|
||||
"github.com/docker/docker/pkg/archive"
|
||||
"github.com/docker/docker/pkg/reexec"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type applyLayerResponse struct {
|
||||
LayerSize int64 `json:"layerSize"`
|
||||
}
|
||||
|
||||
// applyLayer is the entry-point for docker-applylayer on re-exec. This is not
|
||||
// used on Windows as it does not support chroot, hence no point sandboxing
|
||||
// through chroot and rexec.
|
||||
func applyLayer() {
|
||||
|
||||
var (
|
||||
err error
|
||||
options *archive.TarOptions
|
||||
)
|
||||
runtime.LockOSThread()
|
||||
flag.Parse()
|
||||
|
||||
inUserns := userns.RunningInUserNS()
|
||||
if err := chroot(flag.Arg(0)); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
// We need to be able to set any perms
|
||||
oldmask := unix.Umask(0)
|
||||
defer unix.Umask(oldmask)
|
||||
|
||||
if err := json.Unmarshal([]byte(os.Getenv("OPT")), &options); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
if inUserns {
|
||||
options.InUserNS = true
|
||||
}
|
||||
|
||||
size, err := archive.UnpackLayer("/", os.Stdin, options)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
encoder := json.NewEncoder(os.Stdout)
|
||||
if err := encoder.Encode(applyLayerResponse{size}); err != nil {
|
||||
fatal(fmt.Errorf("unable to encode layerSize JSON: %s", err))
|
||||
}
|
||||
|
||||
if _, err := flush(os.Stdin); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// applyLayerHandler parses a diff in the standard layer format from `layer`, and
|
||||
// applies it to the directory `dest`. Returns the size in bytes of the
|
||||
// contents of the layer.
|
||||
|
@ -85,42 +28,30 @@ func applyLayerHandler(dest string, layer io.Reader, options *archive.TarOptions
|
|||
}
|
||||
if options == nil {
|
||||
options = &archive.TarOptions{}
|
||||
if userns.RunningInUserNS() {
|
||||
options.InUserNS = true
|
||||
}
|
||||
}
|
||||
if userns.RunningInUserNS() {
|
||||
options.InUserNS = true
|
||||
}
|
||||
if options.ExcludePatterns == nil {
|
||||
options.ExcludePatterns = []string{}
|
||||
}
|
||||
|
||||
data, err := json.Marshal(options)
|
||||
type result struct {
|
||||
layerSize int64
|
||||
err error
|
||||
}
|
||||
|
||||
done := make(chan result)
|
||||
err = Go(dest, func() {
|
||||
// We need to be able to set any perms
|
||||
_ = unix.Umask(0)
|
||||
|
||||
size, err := archive.UnpackLayer("/", layer, options)
|
||||
done <- result{layerSize: size, err: err}
|
||||
})
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("ApplyLayer json encode: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
cmd := reexec.Command("docker-applyLayer", dest)
|
||||
cmd.Stdin = layer
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("OPT=%s", data))
|
||||
|
||||
outBuf, errBuf := new(bytes.Buffer), new(bytes.Buffer)
|
||||
cmd.Stdout, cmd.Stderr = outBuf, errBuf
|
||||
|
||||
// reexec.Command() sets cmd.SysProcAttr.Pdeathsig on Linux, which
|
||||
// causes the started process to be signaled when the creating OS thread
|
||||
// dies. Ensure that the reexec is not prematurely signaled. See
|
||||
// https://go.dev/issue/27505 for more information.
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if err = cmd.Run(); err != nil {
|
||||
return 0, fmt.Errorf("ApplyLayer %s stdout: %s stderr: %s", err, outBuf, errBuf)
|
||||
}
|
||||
|
||||
// Stdout should be a valid JSON struct representing an applyLayerResponse.
|
||||
response := applyLayerResponse{}
|
||||
decoder := json.NewDecoder(outBuf)
|
||||
if err = decoder.Decode(&response); err != nil {
|
||||
return 0, fmt.Errorf("unable to decode ApplyLayer JSON response: %s", err)
|
||||
}
|
||||
|
||||
return response.LayerSize, nil
|
||||
res := <-done
|
||||
return res.layerSize, res.err
|
||||
}
|
||||
|
|
92
pkg/chrootarchive/go_linux.go
Normal file
92
pkg/chrootarchive/go_linux.go
Normal file
|
@ -0,0 +1,92 @@
|
|||
//go:build go1.10
|
||||
// +build go1.10
|
||||
|
||||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// The startup thread of a process is special in a few different ways.
|
||||
// Most pertinent to the discussion at hand, any per-thread kernel state
|
||||
// reflected in the /proc/[pid]/ directory for a process is taken from
|
||||
// the state of the startup thread. Same goes for /proc/self/; it shows
|
||||
// the state of the current process' startup thread, no matter which
|
||||
// thread the files are being opened from. For most programs this is a
|
||||
// distinction without a difference as the kernel state, such as the
|
||||
// mount namespace and current working directory, is shared among (and
|
||||
// kept synchronized across) all threads of a process. But things start
|
||||
// to break down once threads start unsharing and modifying parts of
|
||||
// their kernel state.
|
||||
//
|
||||
// The Go runtime schedules goroutines to execute on the startup thread,
|
||||
// same as any other. How this could be problematic is best illustrated
|
||||
// with a concrete example. Consider what happens if a goroutine spawned
|
||||
// from Go() gets scheduled onto the startup thread. The thread's mount
|
||||
// namespace will be unshared and modified. The contents of the
|
||||
// /proc/[pid]/mountinfo file will then describe the mount tree of the
|
||||
// unshared namespace, not the namespace of any other thread. It will
|
||||
// remain this way until the process exits. (The startup thread is
|
||||
// special in another way: exiting it puts the process into a
|
||||
// "non-waitable zombie" state. To avoid this fate, the Go runtime parks
|
||||
// the thread instead of exiting if a goroutine returns while locked to
|
||||
// the startup thread. More information can be found in the Go runtime
|
||||
// sources: `go doc -u -src runtime.mexit`.)
|
||||
// The github.com/moby/sys/mountinfo package reads from
|
||||
// /proc/self/mountinfo, so will read the mount tree for the wrong
|
||||
// namespace if the startup thread has had its mount namespace unshared!
|
||||
// The /proc/thread-self/ magic symlink, introduced in Linux 3.17, is
|
||||
// one potential solution to this problem, but every package which opens
|
||||
// files in /proc/self/ would need to be updated, and fallbacks to
|
||||
// /proc/self/task/{{syscall.Gettid()}}/ would be required to support
|
||||
// older kernels. Overlooking any reference to /proc/self/ would
|
||||
// manifest as stochastically-reproducible bugs, so this is far from an
|
||||
// ideal solution.
|
||||
//
|
||||
// Reading from /proc/self/ would not be a problem if we can prevent the
|
||||
// per-thread state of the startup thread from being modified
|
||||
// nondeterministically in the first place. We can accomplish this
|
||||
// simply by locking the main() function to the startup thread! Doing so
|
||||
// excludes any other goroutine from being scheduled on the thread.
|
||||
runtime.LockOSThread()
|
||||
}
|
||||
|
||||
// Go starts fn in a goroutine where the root directory, current working
|
||||
// directory and umask are unshared from other goroutines and the root directory
|
||||
// has been changed to path. These changes are only visible to the goroutine in
|
||||
// which fn is executed. Any other goroutines, including ones started from fn,
|
||||
// will see the same root directory and file system attributes as the rest of
|
||||
// the process.
|
||||
func Go(path string, fn func()) error {
|
||||
started := make(chan error)
|
||||
go func() {
|
||||
// Prepare to manipulate per-thread kernel state. Wire the
|
||||
// goroutine to the OS thread so execution of other goroutines
|
||||
// will not be scheduled on it. It is very important not to
|
||||
// unwire the goroutine from the thread so that the thread exits
|
||||
// with this goroutine and is not returned to the goroutine
|
||||
// thread pool.
|
||||
runtime.LockOSThread()
|
||||
|
||||
// Under Linux, threads are implemented as processes which share
|
||||
// a virtual memory space. Therefore in a multithreaded process
|
||||
// unshare(2) disassociates parts of the calling thread's
|
||||
// context from the thread it was clone(2)'d from.
|
||||
if err := unix.Unshare(unix.CLONE_FS); err != nil {
|
||||
started <- err
|
||||
return
|
||||
}
|
||||
|
||||
if err := chroot(path); err != nil {
|
||||
started <- err
|
||||
return
|
||||
}
|
||||
|
||||
close(started)
|
||||
fn()
|
||||
}()
|
||||
return <-started
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package chrootarchive // import "github.com/docker/docker/pkg/chrootarchive"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/docker/docker/pkg/reexec"
|
||||
)
|
||||
|
||||
func init() {
|
||||
reexec.Register("docker-applyLayer", applyLayer)
|
||||
reexec.Register("docker-untar", untar)
|
||||
reexec.Register("docker-tar", tar)
|
||||
}
|
||||
|
||||
func fatal(err error) {
|
||||
fmt.Fprint(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// flush consumes all the bytes from the reader discarding
|
||||
// any errors
|
||||
func flush(r io.Reader) (bytes int64, err error) {
|
||||
return io.Copy(io.Discard, r)
|
||||
}
|
Loading…
Reference in a new issue