moby/pkg/archive/copy.go

package archive // import "github.com/docker/docker/pkg/archive"

import (
	"archive/tar"
	"errors"
	"io"
	"io/ioutil"
	"os"
	"path/filepath"
	"strings"

	"github.com/docker/docker/pkg/system"
	"github.com/sirupsen/logrus"
)

// Errors used or returned by this file.
var (
	ErrNotDirectory      = errors.New("not a directory")
	ErrDirNotExists      = errors.New("no such directory")
	ErrCannotCopyDir     = errors.New("cannot copy directory")
	ErrInvalidCopySource = errors.New("invalid copy source content")
)

// PreserveTrailingDotOrSeparator returns the given cleaned path (after
// processing using any utility functions from the path or filepath stdlib
// packages) and appends a trailing `/.` or `/` if its corresponding  original
// path (from before being processed by utility functions from the path or
// filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned
// path already ends in a `.` path segment, then another is not added. If the
// clean path already ends in the separator, then another is not added.
func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string, sep byte) string {
	// Ensure paths are in platform semantics
	cleanedPath = strings.Replace(cleanedPath, "/", string(sep), -1)
	originalPath = strings.Replace(originalPath, "/", string(sep), -1)

	if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) {
		if !hasTrailingPathSeparator(cleanedPath, sep) {
			// Add a separator if it doesn't already end with one (a cleaned
			// path would only end in a separator if it is the root).
			cleanedPath += string(sep)
		}
		cleanedPath += "."
	}

	if !hasTrailingPathSeparator(cleanedPath, sep) && hasTrailingPathSeparator(originalPath, sep) {
		cleanedPath += string(sep)
	}

	return cleanedPath
}

// assertsDirectory returns whether the given path is
// asserted to be a directory, i.e., the path ends with
// a trailing '/' or `/.`, assuming a path separator of `/`.
func assertsDirectory(path string, sep byte) bool {
	return hasTrailingPathSeparator(path, sep) || specifiesCurrentDir(path)
}

// hasTrailingPathSeparator returns whether the given
// path ends with the system's path separator character.
func hasTrailingPathSeparator(path string, sep byte) bool {
	return len(path) > 0 && path[len(path)-1] == sep
}

// specifiesCurrentDir returns whether the given path specifies
// a "current directory", i.e., the last path segment is `.`.
func specifiesCurrentDir(path string) bool {
	return filepath.Base(path) == "."
}

// SplitPathDirEntry splits the given path between its directory name and its
// basename by first cleaning the path but preserves a trailing "." if the
// original path specified the current directory.
func SplitPathDirEntry(path string) (dir, base string) {
	cleanedPath := filepath.Clean(filepath.FromSlash(path))

	if specifiesCurrentDir(path) {
		cleanedPath += string(os.PathSeparator) + "."
	}

	return filepath.Dir(cleanedPath), filepath.Base(cleanedPath)
}

// TarResource archives the resource described by the given CopyInfo to a Tar
// archive. A non-nil error is returned if sourcePath does not exist or is
// asserted to be a directory but exists as another type of file.
//
// This function acts as a convenient wrapper around TarWithOptions, which
// requires a directory as the source path. TarResource accepts either a
// directory or a file path and correctly sets the Tar options.
func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
	return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
}

// TarResourceRebase is like TarResource but renames the first path element of
// items in the resulting tar archive to match the given rebaseName if not "".
func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
	sourcePath = normalizePath(sourcePath)
	if _, err = os.Lstat(sourcePath); err != nil {
		// Catches the case where the source does not exist or is not a
		// directory if asserted to be a directory, as this also causes an
		// error.
		return
	}

	// Separate the source path between its directory and
	// the entry in that directory which we are archiving.
	sourceDir, sourceBase := SplitPathDirEntry(sourcePath)
	opts := TarResourceRebaseOpts(sourceBase, rebaseName)

	logrus.Debugf("copying %q from %q", sourceBase, sourceDir)
	return TarWithOptions(sourceDir, opts)
}

// TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase
// parameters to be sent to TarWithOptions (the TarOptions struct)
func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions {
	filter := []string{sourceBase}
	return &TarOptions{
		Compression:      Uncompressed,
		IncludeFiles:     filter,
		IncludeSourceDir: true,
		RebaseNames: map[string]string{
			sourceBase: rebaseName,
		},
	}
}

// CopyInfo holds basic info about the source
// or destination path of a copy operation.
type CopyInfo struct {
	Path       string
	Exists     bool
	IsDir      bool
	RebaseName string
}

// CopyInfoSourcePath stats the given path to create a CopyInfo
// struct representing that resource for the source of an archive copy
// operation. The given path should be an absolute local path. A source path
// has all symlinks evaluated that appear before the last path separator ("/"
// on Unix). As it is to be a copy source, the path must exist.
func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) {
	// normalize the file path and then evaluate the symbol link
	// we will use the target file instead of the symbol link if
	// followLink is set
	path = normalizePath(path)

	resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink)
	if err != nil {
		return CopyInfo{}, err
	}

	stat, err := os.Lstat(resolvedPath)
	if err != nil {
		return CopyInfo{}, err
	}

	return CopyInfo{
		Path:       resolvedPath,
		Exists:     true,
		IsDir:      stat.IsDir(),
		RebaseName: rebaseName,
	}, nil
}

// CopyInfoDestinationPath stats the given path to create a CopyInfo
// struct representing that resource for the destination of an archive copy
// operation. The given path should be an absolute local path.
func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
	maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot.
	path = normalizePath(path)
	originalPath := path

	stat, err := os.Lstat(path)

	if err == nil && stat.Mode()&os.ModeSymlink == 0 {
		// The path exists and is not a symlink.
		return CopyInfo{
			Path:   path,
			Exists: true,
			IsDir:  stat.IsDir(),
		}, nil
	}

	// While the path is a symlink.
	for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ {
		if n > maxSymlinkIter {
			// Don't follow symlinks more than this arbitrary number of times.
			return CopyInfo{}, errors.New("too many symlinks in " + originalPath)
		}

		// The path is a symbolic link. We need to evaluate it so that the
		// destination of the copy operation is the link target and not the
		// link itself. This is notably different than CopyInfoSourcePath which
		// only evaluates symlinks before the last appearing path separator.
		// Also note that it is okay if the last path element is a broken
		// symlink as the copy operation should create the target.
		var linkTarget string

		linkTarget, err = os.Readlink(path)
		if err != nil {
			return CopyInfo{}, err
		}

		if !system.IsAbs(linkTarget) {
			// Join with the parent directory.
			dstParent, _ := SplitPathDirEntry(path)
			linkTarget = filepath.Join(dstParent, linkTarget)
		}

		path = linkTarget
		stat, err = os.Lstat(path)
	}

	if err != nil {
		// It's okay if the destination path doesn't exist. We can still
		// continue the copy operation if the parent directory exists.
		if !os.IsNotExist(err) {
			return CopyInfo{}, err
		}

		// Ensure destination parent dir exists.
		dstParent, _ := SplitPathDirEntry(path)

		parentDirStat, err := os.Stat(dstParent)
		if err != nil {
			return CopyInfo{}, err
		}
		if !parentDirStat.IsDir() {
			return CopyInfo{}, ErrNotDirectory
		}

		return CopyInfo{Path: path}, nil
	}

	// The path exists after resolving symlinks.
	return CopyInfo{
		Path:   path,
		Exists: true,
		IsDir:  stat.IsDir(),
	}, nil
}

// PrepareArchiveCopy prepares the given srcContent archive, which should
// contain the archived resource described by srcInfo, to the destination
// described by dstInfo. Returns the possibly modified content archive along
// with the path to the destination directory which it should be extracted to.
func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
	// Ensure in platform semantics
	srcInfo.Path = normalizePath(srcInfo.Path)
	dstInfo.Path = normalizePath(dstInfo.Path)

	// Separate the destination path between its directory and base
	// components in case the source archive contents need to be rebased.
	dstDir, dstBase := SplitPathDirEntry(dstInfo.Path)
	_, srcBase := SplitPathDirEntry(srcInfo.Path)

	switch {
	case dstInfo.Exists && dstInfo.IsDir:
		// The destination exists as a directory. No alteration
		// to srcContent is needed as its contents can be
		// simply extracted to the destination directory.
		return dstInfo.Path, ioutil.NopCloser(srcContent), nil
	case dstInfo.Exists && srcInfo.IsDir:
		// The destination exists as some type of file and the source
		// content is a directory. This is an error condition since
		// you cannot copy a directory to an existing file location.
		return "", nil, ErrCannotCopyDir
	case dstInfo.Exists:
		// The destination exists as some type of file and the source content
		// is also a file. The source content entry will have to be renamed to
		// have a basename which matches the destination path's basename.
		if len(srcInfo.RebaseName) != 0 {
			srcBase = srcInfo.RebaseName
		}
		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
	case srcInfo.IsDir:
		// The destination does not exist and the source content is an archive
		// of a directory. The archive should be extracted to the parent of
		// the destination path instead, and when it is, the directory that is
		// created as a result should take the name of the destination path.
		// The source content entries will have to be renamed to have a
		// basename which matches the destination path's basename.
		if len(srcInfo.RebaseName) != 0 {
			srcBase = srcInfo.RebaseName
		}
		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
	case assertsDirectory(dstInfo.Path, os.PathSeparator):
		// The destination does not exist and is asserted to be created as a
		// directory, but the source content is not a directory. This is an
		// error condition since you cannot create a directory from a file
		// source.
		return "", nil, ErrDirNotExists
	default:
		// The last remaining case is when the destination does not exist, is
		// not asserted to be a directory, and the source content is not an
		// archive of a directory. It this case, the destination file will need
		// to be created when the archive is extracted and the source content
		// entry will have to be renamed to have a basename which matches the
		// destination path's basename.
		if len(srcInfo.RebaseName) != 0 {
			srcBase = srcInfo.RebaseName
		}
		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
	}

}

// RebaseArchiveEntries rewrites the given srcContent archive replacing
// an occurrence of oldBase with newBase at the beginning of entry names.
func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
	if oldBase == string(os.PathSeparator) {
		// If oldBase specifies the root directory, use an empty string as
		// oldBase instead so that newBase doesn't replace the path separator
		// that all paths will start with.
		oldBase = ""
	}

	rebased, w := io.Pipe()

	go func() {
		srcTar := tar.NewReader(srcContent)
		rebasedTar := tar.NewWriter(w)

		for {
			hdr, err := srcTar.Next()
			if err == io.EOF {
				// Signals end of archive.
				rebasedTar.Close()
				w.Close()
				return
			}
			if err != nil {
				w.CloseWithError(err)
				return
			}

			// srcContent tar stream, as served by TarWithOptions(), is
			// definitely in PAX format, but tar.Next() mistakenly guesses it
			// as USTAR, which creates a problem: if the newBase is >100
			// characters long, WriteHeader() returns an error like
			// "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...".
			//
			// To fix, set the format to PAX here. See docker/for-linux issue #484.
			hdr.Format = tar.FormatPAX
			hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1)
			if hdr.Typeflag == tar.TypeLink {
				hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1)
			}

			if err = rebasedTar.WriteHeader(hdr); err != nil {
				w.CloseWithError(err)
				return
			}

			// Ignoring GoSec G110. See https://github.com/securego/gosec/pull/433
			// and https://cure53.de/pentest-report_opa.pdf, which recommends to
			// replace io.Copy with io.CopyN7. The latter allows to specify the
			// maximum number of bytes that should be read. By properly defining
			// the limit, it can be assured that a GZip compression bomb cannot
			// easily cause a Denial-of-Service.
			// After reviewing with @tonistiigi and @cpuguy83, this should not
			// affect us, because here we do not read into memory, hence should
			// not be vulnerable to this code consuming memory.
			//nolint:gosec // G110: Potential DoS vulnerability via decompression bomb (gosec)
			if _, err = io.Copy(rebasedTar, srcTar); err != nil {
				w.CloseWithError(err)
				return
			}
		}
	}()

	return rebased
}

// TODO @gupta-ak. These might have to be changed in the future to be
// continuity driver aware as well to support LCOW.

// CopyResource performs an archive copy from the given source path to the
// given destination path. The source path MUST exist and the destination
// path's parent directory must exist.
func CopyResource(srcPath, dstPath string, followLink bool) error {
	var (
		srcInfo CopyInfo
		err     error
	)

	// Ensure in platform semantics
	srcPath = normalizePath(srcPath)
	dstPath = normalizePath(dstPath)

	// Clean the source and destination paths.
	srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath, os.PathSeparator)
	dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath, os.PathSeparator)

	if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil {
		return err
	}

	content, err := TarResource(srcInfo)
	if err != nil {
		return err
	}
	defer content.Close()

	return CopyTo(content, srcInfo, dstPath)
}

// CopyTo handles extracting the given content whose
// entries should be sourced from srcInfo to dstPath.
func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
	// The destination path need not exist, but CopyInfoDestinationPath will
	// ensure that at least the parent directory exists.
	dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))
	if err != nil {
		return err
	}

	dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo)
	if err != nil {
		return err
	}
	defer copyArchive.Close()

	options := &TarOptions{
		NoLchown:             true,
		NoOverwriteDirNonDir: true,
	}

	return Untar(copyArchive, dstDir, options)
}

// ResolveHostSourcePath decides real path need to be copied with parameters such as
// whether to follow symbol link or not, if followLink is true, resolvedPath will return
// link target of any symbol link file, else it will only resolve symlink of directory
// but return symbol link file itself without resolving.
func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) {
	if followLink {
		resolvedPath, err = filepath.EvalSymlinks(path)
		if err != nil {
			return
		}

		resolvedPath, rebaseName = GetRebaseName(path, resolvedPath)
	} else {
		dirPath, basePath := filepath.Split(path)

		// if not follow symbol link, then resolve symbol link of parent dir
		var resolvedDirPath string
		resolvedDirPath, err = filepath.EvalSymlinks(dirPath)
		if err != nil {
			return
		}
		// resolvedDirPath will have been cleaned (no trailing path separators) so
		// we can manually join it with the base path element.
		resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath
		if hasTrailingPathSeparator(path, os.PathSeparator) &&
			filepath.Base(path) != filepath.Base(resolvedPath) {
			rebaseName = filepath.Base(path)
		}
	}
	return resolvedPath, rebaseName, nil
}

// GetRebaseName normalizes and compares path and resolvedPath,
// return completed resolved path and rebased file name
func GetRebaseName(path, resolvedPath string) (string, string) {
	// linkTarget will have been cleaned (no trailing path separators and dot) so
	// we can manually join it with them
	var rebaseName string
	if specifiesCurrentDir(path) &&
		!specifiesCurrentDir(resolvedPath) {
		resolvedPath += string(filepath.Separator) + "."
	}

	if hasTrailingPathSeparator(path, os.PathSeparator) &&
		!hasTrailingPathSeparator(resolvedPath, os.PathSeparator) {
		resolvedPath += string(filepath.Separator)
	}

	if filepath.Base(path) != filepath.Base(resolvedPath) {
		// In the case where the path had a trailing separator and a symlink
		// evaluation has changed the last path component, we will need to
		// rebase the name in the archive that is being copied to match the
		// originally requested name.
		rebaseName = filepath.Base(path)
	}
	return resolvedPath, rebaseName
}