copy.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. package archive // import "github.com/docker/docker/pkg/archive"
  2. import (
  3. "archive/tar"
  4. "context"
  5. "errors"
  6. "io"
  7. "os"
  8. "path/filepath"
  9. "strings"
  10. "github.com/containerd/log"
  11. "github.com/docker/docker/pkg/system"
  12. )
  13. // Errors used or returned by this file.
  14. var (
  15. ErrNotDirectory = errors.New("not a directory")
  16. ErrDirNotExists = errors.New("no such directory")
  17. ErrCannotCopyDir = errors.New("cannot copy directory")
  18. ErrInvalidCopySource = errors.New("invalid copy source content")
  19. )
  20. // PreserveTrailingDotOrSeparator returns the given cleaned path (after
  21. // processing using any utility functions from the path or filepath stdlib
  22. // packages) and appends a trailing `/.` or `/` if its corresponding original
  23. // path (from before being processed by utility functions from the path or
  24. // filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned
  25. // path already ends in a `.` path segment, then another is not added. If the
  26. // clean path already ends in a path separator, then another is not added.
  27. func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string) string {
  28. // Ensure paths are in platform semantics
  29. cleanedPath = normalizePath(cleanedPath)
  30. originalPath = normalizePath(originalPath)
  31. if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) {
  32. if !hasTrailingPathSeparator(cleanedPath) {
  33. // Add a separator if it doesn't already end with one (a cleaned
  34. // path would only end in a separator if it is the root).
  35. cleanedPath += string(filepath.Separator)
  36. }
  37. cleanedPath += "."
  38. }
  39. if !hasTrailingPathSeparator(cleanedPath) && hasTrailingPathSeparator(originalPath) {
  40. cleanedPath += string(filepath.Separator)
  41. }
  42. return cleanedPath
  43. }
  44. // assertsDirectory returns whether the given path is
  45. // asserted to be a directory, i.e., the path ends with
  46. // a trailing '/' or `/.`, assuming a path separator of `/`.
  47. func assertsDirectory(path string) bool {
  48. return hasTrailingPathSeparator(path) || specifiesCurrentDir(path)
  49. }
  50. // hasTrailingPathSeparator returns whether the given
  51. // path ends with the system's path separator character.
  52. func hasTrailingPathSeparator(path string) bool {
  53. return len(path) > 0 && path[len(path)-1] == filepath.Separator
  54. }
  55. // specifiesCurrentDir returns whether the given path specifies
  56. // a "current directory", i.e., the last path segment is `.`.
  57. func specifiesCurrentDir(path string) bool {
  58. return filepath.Base(path) == "."
  59. }
  60. // SplitPathDirEntry splits the given path between its directory name and its
  61. // basename by first cleaning the path but preserves a trailing "." if the
  62. // original path specified the current directory.
  63. func SplitPathDirEntry(path string) (dir, base string) {
  64. cleanedPath := filepath.Clean(filepath.FromSlash(path))
  65. if specifiesCurrentDir(path) {
  66. cleanedPath += string(os.PathSeparator) + "."
  67. }
  68. return filepath.Dir(cleanedPath), filepath.Base(cleanedPath)
  69. }
  70. // TarResource archives the resource described by the given CopyInfo to a Tar
  71. // archive. A non-nil error is returned if sourcePath does not exist or is
  72. // asserted to be a directory but exists as another type of file.
  73. //
  74. // This function acts as a convenient wrapper around TarWithOptions, which
  75. // requires a directory as the source path. TarResource accepts either a
  76. // directory or a file path and correctly sets the Tar options.
  77. func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
  78. return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
  79. }
  80. // TarResourceRebase is like TarResource but renames the first path element of
  81. // items in the resulting tar archive to match the given rebaseName if not "".
  82. func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
  83. sourcePath = normalizePath(sourcePath)
  84. if _, err = os.Lstat(sourcePath); err != nil {
  85. // Catches the case where the source does not exist or is not a
  86. // directory if asserted to be a directory, as this also causes an
  87. // error.
  88. return
  89. }
  90. // Separate the source path between its directory and
  91. // the entry in that directory which we are archiving.
  92. sourceDir, sourceBase := SplitPathDirEntry(sourcePath)
  93. opts := TarResourceRebaseOpts(sourceBase, rebaseName)
  94. log.G(context.TODO()).Debugf("copying %q from %q", sourceBase, sourceDir)
  95. return TarWithOptions(sourceDir, opts)
  96. }
  97. // TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase
  98. // parameters to be sent to TarWithOptions (the TarOptions struct)
  99. func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions {
  100. filter := []string{sourceBase}
  101. return &TarOptions{
  102. Compression: Uncompressed,
  103. IncludeFiles: filter,
  104. IncludeSourceDir: true,
  105. RebaseNames: map[string]string{
  106. sourceBase: rebaseName,
  107. },
  108. }
  109. }
  110. // CopyInfo holds basic info about the source
  111. // or destination path of a copy operation.
  112. type CopyInfo struct {
  113. Path string
  114. Exists bool
  115. IsDir bool
  116. RebaseName string
  117. }
  118. // CopyInfoSourcePath stats the given path to create a CopyInfo
  119. // struct representing that resource for the source of an archive copy
  120. // operation. The given path should be an absolute local path. A source path
  121. // has all symlinks evaluated that appear before the last path separator ("/"
  122. // on Unix). As it is to be a copy source, the path must exist.
  123. func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) {
  124. // normalize the file path and then evaluate the symbol link
  125. // we will use the target file instead of the symbol link if
  126. // followLink is set
  127. path = normalizePath(path)
  128. resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink)
  129. if err != nil {
  130. return CopyInfo{}, err
  131. }
  132. stat, err := os.Lstat(resolvedPath)
  133. if err != nil {
  134. return CopyInfo{}, err
  135. }
  136. return CopyInfo{
  137. Path: resolvedPath,
  138. Exists: true,
  139. IsDir: stat.IsDir(),
  140. RebaseName: rebaseName,
  141. }, nil
  142. }
  143. // CopyInfoDestinationPath stats the given path to create a CopyInfo
  144. // struct representing that resource for the destination of an archive copy
  145. // operation. The given path should be an absolute local path.
  146. func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
  147. maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot.
  148. path = normalizePath(path)
  149. originalPath := path
  150. stat, err := os.Lstat(path)
  151. if err == nil && stat.Mode()&os.ModeSymlink == 0 {
  152. // The path exists and is not a symlink.
  153. return CopyInfo{
  154. Path: path,
  155. Exists: true,
  156. IsDir: stat.IsDir(),
  157. }, nil
  158. }
  159. // While the path is a symlink.
  160. for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ {
  161. if n > maxSymlinkIter {
  162. // Don't follow symlinks more than this arbitrary number of times.
  163. return CopyInfo{}, errors.New("too many symlinks in " + originalPath)
  164. }
  165. // The path is a symbolic link. We need to evaluate it so that the
  166. // destination of the copy operation is the link target and not the
  167. // link itself. This is notably different than CopyInfoSourcePath which
  168. // only evaluates symlinks before the last appearing path separator.
  169. // Also note that it is okay if the last path element is a broken
  170. // symlink as the copy operation should create the target.
  171. var linkTarget string
  172. linkTarget, err = os.Readlink(path)
  173. if err != nil {
  174. return CopyInfo{}, err
  175. }
  176. if !system.IsAbs(linkTarget) {
  177. // Join with the parent directory.
  178. dstParent, _ := SplitPathDirEntry(path)
  179. linkTarget = filepath.Join(dstParent, linkTarget)
  180. }
  181. path = linkTarget
  182. stat, err = os.Lstat(path)
  183. }
  184. if err != nil {
  185. // It's okay if the destination path doesn't exist. We can still
  186. // continue the copy operation if the parent directory exists.
  187. if !os.IsNotExist(err) {
  188. return CopyInfo{}, err
  189. }
  190. // Ensure destination parent dir exists.
  191. dstParent, _ := SplitPathDirEntry(path)
  192. parentDirStat, err := os.Stat(dstParent)
  193. if err != nil {
  194. return CopyInfo{}, err
  195. }
  196. if !parentDirStat.IsDir() {
  197. return CopyInfo{}, ErrNotDirectory
  198. }
  199. return CopyInfo{Path: path}, nil
  200. }
  201. // The path exists after resolving symlinks.
  202. return CopyInfo{
  203. Path: path,
  204. Exists: true,
  205. IsDir: stat.IsDir(),
  206. }, nil
  207. }
  208. // PrepareArchiveCopy prepares the given srcContent archive, which should
  209. // contain the archived resource described by srcInfo, to the destination
  210. // described by dstInfo. Returns the possibly modified content archive along
  211. // with the path to the destination directory which it should be extracted to.
  212. func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
  213. // Ensure in platform semantics
  214. srcInfo.Path = normalizePath(srcInfo.Path)
  215. dstInfo.Path = normalizePath(dstInfo.Path)
  216. // Separate the destination path between its directory and base
  217. // components in case the source archive contents need to be rebased.
  218. dstDir, dstBase := SplitPathDirEntry(dstInfo.Path)
  219. _, srcBase := SplitPathDirEntry(srcInfo.Path)
  220. switch {
  221. case dstInfo.Exists && dstInfo.IsDir:
  222. // The destination exists as a directory. No alteration
  223. // to srcContent is needed as its contents can be
  224. // simply extracted to the destination directory.
  225. return dstInfo.Path, io.NopCloser(srcContent), nil
  226. case dstInfo.Exists && srcInfo.IsDir:
  227. // The destination exists as some type of file and the source
  228. // content is a directory. This is an error condition since
  229. // you cannot copy a directory to an existing file location.
  230. return "", nil, ErrCannotCopyDir
  231. case dstInfo.Exists:
  232. // The destination exists as some type of file and the source content
  233. // is also a file. The source content entry will have to be renamed to
  234. // have a basename which matches the destination path's basename.
  235. if len(srcInfo.RebaseName) != 0 {
  236. srcBase = srcInfo.RebaseName
  237. }
  238. return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
  239. case srcInfo.IsDir:
  240. // The destination does not exist and the source content is an archive
  241. // of a directory. The archive should be extracted to the parent of
  242. // the destination path instead, and when it is, the directory that is
  243. // created as a result should take the name of the destination path.
  244. // The source content entries will have to be renamed to have a
  245. // basename which matches the destination path's basename.
  246. if len(srcInfo.RebaseName) != 0 {
  247. srcBase = srcInfo.RebaseName
  248. }
  249. return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
  250. case assertsDirectory(dstInfo.Path):
  251. // The destination does not exist and is asserted to be created as a
  252. // directory, but the source content is not a directory. This is an
  253. // error condition since you cannot create a directory from a file
  254. // source.
  255. return "", nil, ErrDirNotExists
  256. default:
  257. // The last remaining case is when the destination does not exist, is
  258. // not asserted to be a directory, and the source content is not an
  259. // archive of a directory. It this case, the destination file will need
  260. // to be created when the archive is extracted and the source content
  261. // entry will have to be renamed to have a basename which matches the
  262. // destination path's basename.
  263. if len(srcInfo.RebaseName) != 0 {
  264. srcBase = srcInfo.RebaseName
  265. }
  266. return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
  267. }
  268. }
  269. // RebaseArchiveEntries rewrites the given srcContent archive replacing
  270. // an occurrence of oldBase with newBase at the beginning of entry names.
  271. func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
  272. if oldBase == string(os.PathSeparator) {
  273. // If oldBase specifies the root directory, use an empty string as
  274. // oldBase instead so that newBase doesn't replace the path separator
  275. // that all paths will start with.
  276. oldBase = ""
  277. }
  278. rebased, w := io.Pipe()
  279. go func() {
  280. srcTar := tar.NewReader(srcContent)
  281. rebasedTar := tar.NewWriter(w)
  282. for {
  283. hdr, err := srcTar.Next()
  284. if err == io.EOF {
  285. // Signals end of archive.
  286. rebasedTar.Close()
  287. w.Close()
  288. return
  289. }
  290. if err != nil {
  291. w.CloseWithError(err)
  292. return
  293. }
  294. // srcContent tar stream, as served by TarWithOptions(), is
  295. // definitely in PAX format, but tar.Next() mistakenly guesses it
  296. // as USTAR, which creates a problem: if the newBase is >100
  297. // characters long, WriteHeader() returns an error like
  298. // "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...".
  299. //
  300. // To fix, set the format to PAX here. See docker/for-linux issue #484.
  301. hdr.Format = tar.FormatPAX
  302. hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1)
  303. if hdr.Typeflag == tar.TypeLink {
  304. hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1)
  305. }
  306. if err = rebasedTar.WriteHeader(hdr); err != nil {
  307. w.CloseWithError(err)
  308. return
  309. }
  310. // Ignoring GoSec G110. See https://github.com/securego/gosec/pull/433
  311. // and https://cure53.de/pentest-report_opa.pdf, which recommends to
  312. // replace io.Copy with io.CopyN7. The latter allows to specify the
  313. // maximum number of bytes that should be read. By properly defining
  314. // the limit, it can be assured that a GZip compression bomb cannot
  315. // easily cause a Denial-of-Service.
  316. // After reviewing with @tonistiigi and @cpuguy83, this should not
  317. // affect us, because here we do not read into memory, hence should
  318. // not be vulnerable to this code consuming memory.
  319. //nolint:gosec // G110: Potential DoS vulnerability via decompression bomb (gosec)
  320. if _, err = io.Copy(rebasedTar, srcTar); err != nil {
  321. w.CloseWithError(err)
  322. return
  323. }
  324. }
  325. }()
  326. return rebased
  327. }
  328. // CopyResource performs an archive copy from the given source path to the
  329. // given destination path. The source path MUST exist and the destination
  330. // path's parent directory must exist.
  331. func CopyResource(srcPath, dstPath string, followLink bool) error {
  332. var (
  333. srcInfo CopyInfo
  334. err error
  335. )
  336. // Ensure in platform semantics
  337. srcPath = normalizePath(srcPath)
  338. dstPath = normalizePath(dstPath)
  339. // Clean the source and destination paths.
  340. srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath)
  341. dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath)
  342. if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil {
  343. return err
  344. }
  345. content, err := TarResource(srcInfo)
  346. if err != nil {
  347. return err
  348. }
  349. defer content.Close()
  350. return CopyTo(content, srcInfo, dstPath)
  351. }
  352. // CopyTo handles extracting the given content whose
  353. // entries should be sourced from srcInfo to dstPath.
  354. func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
  355. // The destination path need not exist, but CopyInfoDestinationPath will
  356. // ensure that at least the parent directory exists.
  357. dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))
  358. if err != nil {
  359. return err
  360. }
  361. dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo)
  362. if err != nil {
  363. return err
  364. }
  365. defer copyArchive.Close()
  366. options := &TarOptions{
  367. NoLchown: true,
  368. NoOverwriteDirNonDir: true,
  369. }
  370. return Untar(copyArchive, dstDir, options)
  371. }
  372. // ResolveHostSourcePath decides real path need to be copied with parameters such as
  373. // whether to follow symbol link or not, if followLink is true, resolvedPath will return
  374. // link target of any symbol link file, else it will only resolve symlink of directory
  375. // but return symbol link file itself without resolving.
  376. func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) {
  377. if followLink {
  378. resolvedPath, err = filepath.EvalSymlinks(path)
  379. if err != nil {
  380. return
  381. }
  382. resolvedPath, rebaseName = GetRebaseName(path, resolvedPath)
  383. } else {
  384. dirPath, basePath := filepath.Split(path)
  385. // if not follow symbol link, then resolve symbol link of parent dir
  386. var resolvedDirPath string
  387. resolvedDirPath, err = filepath.EvalSymlinks(dirPath)
  388. if err != nil {
  389. return
  390. }
  391. // resolvedDirPath will have been cleaned (no trailing path separators) so
  392. // we can manually join it with the base path element.
  393. resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath
  394. if hasTrailingPathSeparator(path) &&
  395. filepath.Base(path) != filepath.Base(resolvedPath) {
  396. rebaseName = filepath.Base(path)
  397. }
  398. }
  399. return resolvedPath, rebaseName, nil
  400. }
  401. // GetRebaseName normalizes and compares path and resolvedPath,
  402. // return completed resolved path and rebased file name
  403. func GetRebaseName(path, resolvedPath string) (string, string) {
  404. // linkTarget will have been cleaned (no trailing path separators and dot) so
  405. // we can manually join it with them
  406. var rebaseName string
  407. if specifiesCurrentDir(path) &&
  408. !specifiesCurrentDir(resolvedPath) {
  409. resolvedPath += string(filepath.Separator) + "."
  410. }
  411. if hasTrailingPathSeparator(path) &&
  412. !hasTrailingPathSeparator(resolvedPath) {
  413. resolvedPath += string(filepath.Separator)
  414. }
  415. if filepath.Base(path) != filepath.Base(resolvedPath) {
  416. // In the case where the path had a trailing separator and a symlink
  417. // evaluation has changed the last path component, we will need to
  418. // rebase the name in the archive that is being copied to match the
  419. // originally requested name.
  420. rebaseName = filepath.Base(path)
  421. }
  422. return resolvedPath, rebaseName
  423. }