copy.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. package archive // import "github.com/docker/docker/pkg/archive"
  2. import (
  3. "archive/tar"
  4. "errors"
  5. "io"
  6. "os"
  7. "path/filepath"
  8. "strings"
  9. "github.com/docker/docker/pkg/system"
  10. "github.com/sirupsen/logrus"
  11. )
  12. // Errors used or returned by this file.
  13. var (
  14. ErrNotDirectory = errors.New("not a directory")
  15. ErrDirNotExists = errors.New("no such directory")
  16. ErrCannotCopyDir = errors.New("cannot copy directory")
  17. ErrInvalidCopySource = errors.New("invalid copy source content")
  18. )
  19. // PreserveTrailingDotOrSeparator returns the given cleaned path (after
  20. // processing using any utility functions from the path or filepath stdlib
  21. // packages) and appends a trailing `/.` or `/` if its corresponding original
  22. // path (from before being processed by utility functions from the path or
  23. // filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned
  24. // path already ends in a `.` path segment, then another is not added. If the
  25. // clean path already ends in the separator, then another is not added.
  26. func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string, sep byte) string {
  27. // Ensure paths are in platform semantics
  28. cleanedPath = strings.ReplaceAll(cleanedPath, "/", string(sep))
  29. originalPath = strings.ReplaceAll(originalPath, "/", string(sep))
  30. if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) {
  31. if !hasTrailingPathSeparator(cleanedPath, sep) {
  32. // Add a separator if it doesn't already end with one (a cleaned
  33. // path would only end in a separator if it is the root).
  34. cleanedPath += string(sep)
  35. }
  36. cleanedPath += "."
  37. }
  38. if !hasTrailingPathSeparator(cleanedPath, sep) && hasTrailingPathSeparator(originalPath, sep) {
  39. cleanedPath += string(sep)
  40. }
  41. return cleanedPath
  42. }
  43. // assertsDirectory returns whether the given path is
  44. // asserted to be a directory, i.e., the path ends with
  45. // a trailing '/' or `/.`, assuming a path separator of `/`.
  46. func assertsDirectory(path string, sep byte) bool {
  47. return hasTrailingPathSeparator(path, sep) || specifiesCurrentDir(path)
  48. }
  49. // hasTrailingPathSeparator returns whether the given
  50. // path ends with the system's path separator character.
  51. func hasTrailingPathSeparator(path string, sep byte) bool {
  52. return len(path) > 0 && path[len(path)-1] == sep
  53. }
  54. // specifiesCurrentDir returns whether the given path specifies
  55. // a "current directory", i.e., the last path segment is `.`.
  56. func specifiesCurrentDir(path string) bool {
  57. return filepath.Base(path) == "."
  58. }
  59. // SplitPathDirEntry splits the given path between its directory name and its
  60. // basename by first cleaning the path but preserves a trailing "." if the
  61. // original path specified the current directory.
  62. func SplitPathDirEntry(path string) (dir, base string) {
  63. cleanedPath := filepath.Clean(filepath.FromSlash(path))
  64. if specifiesCurrentDir(path) {
  65. cleanedPath += string(os.PathSeparator) + "."
  66. }
  67. return filepath.Dir(cleanedPath), filepath.Base(cleanedPath)
  68. }
  69. // TarResource archives the resource described by the given CopyInfo to a Tar
  70. // archive. A non-nil error is returned if sourcePath does not exist or is
  71. // asserted to be a directory but exists as another type of file.
  72. //
  73. // This function acts as a convenient wrapper around TarWithOptions, which
  74. // requires a directory as the source path. TarResource accepts either a
  75. // directory or a file path and correctly sets the Tar options.
  76. func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
  77. return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
  78. }
  79. // TarResourceRebase is like TarResource but renames the first path element of
  80. // items in the resulting tar archive to match the given rebaseName if not "".
  81. func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
  82. sourcePath = normalizePath(sourcePath)
  83. if _, err = os.Lstat(sourcePath); err != nil {
  84. // Catches the case where the source does not exist or is not a
  85. // directory if asserted to be a directory, as this also causes an
  86. // error.
  87. return
  88. }
  89. // Separate the source path between its directory and
  90. // the entry in that directory which we are archiving.
  91. sourceDir, sourceBase := SplitPathDirEntry(sourcePath)
  92. opts := TarResourceRebaseOpts(sourceBase, rebaseName)
  93. logrus.Debugf("copying %q from %q", sourceBase, sourceDir)
  94. return TarWithOptions(sourceDir, opts)
  95. }
  96. // TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase
  97. // parameters to be sent to TarWithOptions (the TarOptions struct)
  98. func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions {
  99. filter := []string{sourceBase}
  100. return &TarOptions{
  101. Compression: Uncompressed,
  102. IncludeFiles: filter,
  103. IncludeSourceDir: true,
  104. RebaseNames: map[string]string{
  105. sourceBase: rebaseName,
  106. },
  107. }
  108. }
  109. // CopyInfo holds basic info about the source
  110. // or destination path of a copy operation.
  111. type CopyInfo struct {
  112. Path string
  113. Exists bool
  114. IsDir bool
  115. RebaseName string
  116. }
  117. // CopyInfoSourcePath stats the given path to create a CopyInfo
  118. // struct representing that resource for the source of an archive copy
  119. // operation. The given path should be an absolute local path. A source path
  120. // has all symlinks evaluated that appear before the last path separator ("/"
  121. // on Unix). As it is to be a copy source, the path must exist.
  122. func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) {
  123. // normalize the file path and then evaluate the symbol link
  124. // we will use the target file instead of the symbol link if
  125. // followLink is set
  126. path = normalizePath(path)
  127. resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink)
  128. if err != nil {
  129. return CopyInfo{}, err
  130. }
  131. stat, err := os.Lstat(resolvedPath)
  132. if err != nil {
  133. return CopyInfo{}, err
  134. }
  135. return CopyInfo{
  136. Path: resolvedPath,
  137. Exists: true,
  138. IsDir: stat.IsDir(),
  139. RebaseName: rebaseName,
  140. }, nil
  141. }
  142. // CopyInfoDestinationPath stats the given path to create a CopyInfo
  143. // struct representing that resource for the destination of an archive copy
  144. // operation. The given path should be an absolute local path.
  145. func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
  146. maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot.
  147. path = normalizePath(path)
  148. originalPath := path
  149. stat, err := os.Lstat(path)
  150. if err == nil && stat.Mode()&os.ModeSymlink == 0 {
  151. // The path exists and is not a symlink.
  152. return CopyInfo{
  153. Path: path,
  154. Exists: true,
  155. IsDir: stat.IsDir(),
  156. }, nil
  157. }
  158. // While the path is a symlink.
  159. for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ {
  160. if n > maxSymlinkIter {
  161. // Don't follow symlinks more than this arbitrary number of times.
  162. return CopyInfo{}, errors.New("too many symlinks in " + originalPath)
  163. }
  164. // The path is a symbolic link. We need to evaluate it so that the
  165. // destination of the copy operation is the link target and not the
  166. // link itself. This is notably different than CopyInfoSourcePath which
  167. // only evaluates symlinks before the last appearing path separator.
  168. // Also note that it is okay if the last path element is a broken
  169. // symlink as the copy operation should create the target.
  170. var linkTarget string
  171. linkTarget, err = os.Readlink(path)
  172. if err != nil {
  173. return CopyInfo{}, err
  174. }
  175. if !system.IsAbs(linkTarget) {
  176. // Join with the parent directory.
  177. dstParent, _ := SplitPathDirEntry(path)
  178. linkTarget = filepath.Join(dstParent, linkTarget)
  179. }
  180. path = linkTarget
  181. stat, err = os.Lstat(path)
  182. }
  183. if err != nil {
  184. // It's okay if the destination path doesn't exist. We can still
  185. // continue the copy operation if the parent directory exists.
  186. if !os.IsNotExist(err) {
  187. return CopyInfo{}, err
  188. }
  189. // Ensure destination parent dir exists.
  190. dstParent, _ := SplitPathDirEntry(path)
  191. parentDirStat, err := os.Stat(dstParent)
  192. if err != nil {
  193. return CopyInfo{}, err
  194. }
  195. if !parentDirStat.IsDir() {
  196. return CopyInfo{}, ErrNotDirectory
  197. }
  198. return CopyInfo{Path: path}, nil
  199. }
  200. // The path exists after resolving symlinks.
  201. return CopyInfo{
  202. Path: path,
  203. Exists: true,
  204. IsDir: stat.IsDir(),
  205. }, nil
  206. }
  207. // PrepareArchiveCopy prepares the given srcContent archive, which should
  208. // contain the archived resource described by srcInfo, to the destination
  209. // described by dstInfo. Returns the possibly modified content archive along
  210. // with the path to the destination directory which it should be extracted to.
  211. func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
  212. // Ensure in platform semantics
  213. srcInfo.Path = normalizePath(srcInfo.Path)
  214. dstInfo.Path = normalizePath(dstInfo.Path)
  215. // Separate the destination path between its directory and base
  216. // components in case the source archive contents need to be rebased.
  217. dstDir, dstBase := SplitPathDirEntry(dstInfo.Path)
  218. _, srcBase := SplitPathDirEntry(srcInfo.Path)
  219. switch {
  220. case dstInfo.Exists && dstInfo.IsDir:
  221. // The destination exists as a directory. No alteration
  222. // to srcContent is needed as its contents can be
  223. // simply extracted to the destination directory.
  224. return dstInfo.Path, io.NopCloser(srcContent), nil
  225. case dstInfo.Exists && srcInfo.IsDir:
  226. // The destination exists as some type of file and the source
  227. // content is a directory. This is an error condition since
  228. // you cannot copy a directory to an existing file location.
  229. return "", nil, ErrCannotCopyDir
  230. case dstInfo.Exists:
  231. // The destination exists as some type of file and the source content
  232. // is also a file. The source content entry will have to be renamed to
  233. // have a basename which matches the destination path's basename.
  234. if len(srcInfo.RebaseName) != 0 {
  235. srcBase = srcInfo.RebaseName
  236. }
  237. return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
  238. case srcInfo.IsDir:
  239. // The destination does not exist and the source content is an archive
  240. // of a directory. The archive should be extracted to the parent of
  241. // the destination path instead, and when it is, the directory that is
  242. // created as a result should take the name of the destination path.
  243. // The source content entries will have to be renamed to have a
  244. // basename which matches the destination path's basename.
  245. if len(srcInfo.RebaseName) != 0 {
  246. srcBase = srcInfo.RebaseName
  247. }
  248. return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
  249. case assertsDirectory(dstInfo.Path, os.PathSeparator):
  250. // The destination does not exist and is asserted to be created as a
  251. // directory, but the source content is not a directory. This is an
  252. // error condition since you cannot create a directory from a file
  253. // source.
  254. return "", nil, ErrDirNotExists
  255. default:
  256. // The last remaining case is when the destination does not exist, is
  257. // not asserted to be a directory, and the source content is not an
  258. // archive of a directory. It this case, the destination file will need
  259. // to be created when the archive is extracted and the source content
  260. // entry will have to be renamed to have a basename which matches the
  261. // destination path's basename.
  262. if len(srcInfo.RebaseName) != 0 {
  263. srcBase = srcInfo.RebaseName
  264. }
  265. return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
  266. }
  267. }
  268. // RebaseArchiveEntries rewrites the given srcContent archive replacing
  269. // an occurrence of oldBase with newBase at the beginning of entry names.
  270. func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
  271. if oldBase == string(os.PathSeparator) {
  272. // If oldBase specifies the root directory, use an empty string as
  273. // oldBase instead so that newBase doesn't replace the path separator
  274. // that all paths will start with.
  275. oldBase = ""
  276. }
  277. rebased, w := io.Pipe()
  278. go func() {
  279. srcTar := tar.NewReader(srcContent)
  280. rebasedTar := tar.NewWriter(w)
  281. for {
  282. hdr, err := srcTar.Next()
  283. if err == io.EOF {
  284. // Signals end of archive.
  285. rebasedTar.Close()
  286. w.Close()
  287. return
  288. }
  289. if err != nil {
  290. w.CloseWithError(err)
  291. return
  292. }
  293. // srcContent tar stream, as served by TarWithOptions(), is
  294. // definitely in PAX format, but tar.Next() mistakenly guesses it
  295. // as USTAR, which creates a problem: if the newBase is >100
  296. // characters long, WriteHeader() returns an error like
  297. // "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...".
  298. //
  299. // To fix, set the format to PAX here. See docker/for-linux issue #484.
  300. hdr.Format = tar.FormatPAX
  301. hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1)
  302. if hdr.Typeflag == tar.TypeLink {
  303. hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1)
  304. }
  305. if err = rebasedTar.WriteHeader(hdr); err != nil {
  306. w.CloseWithError(err)
  307. return
  308. }
  309. // Ignoring GoSec G110. See https://github.com/securego/gosec/pull/433
  310. // and https://cure53.de/pentest-report_opa.pdf, which recommends to
  311. // replace io.Copy with io.CopyN7. The latter allows to specify the
  312. // maximum number of bytes that should be read. By properly defining
  313. // the limit, it can be assured that a GZip compression bomb cannot
  314. // easily cause a Denial-of-Service.
  315. // After reviewing with @tonistiigi and @cpuguy83, this should not
  316. // affect us, because here we do not read into memory, hence should
  317. // not be vulnerable to this code consuming memory.
  318. //nolint:gosec // G110: Potential DoS vulnerability via decompression bomb (gosec)
  319. if _, err = io.Copy(rebasedTar, srcTar); err != nil {
  320. w.CloseWithError(err)
  321. return
  322. }
  323. }
  324. }()
  325. return rebased
  326. }
  327. // CopyResource performs an archive copy from the given source path to the
  328. // given destination path. The source path MUST exist and the destination
  329. // path's parent directory must exist.
  330. func CopyResource(srcPath, dstPath string, followLink bool) error {
  331. var (
  332. srcInfo CopyInfo
  333. err error
  334. )
  335. // Ensure in platform semantics
  336. srcPath = normalizePath(srcPath)
  337. dstPath = normalizePath(dstPath)
  338. // Clean the source and destination paths.
  339. srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath, os.PathSeparator)
  340. dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath, os.PathSeparator)
  341. if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil {
  342. return err
  343. }
  344. content, err := TarResource(srcInfo)
  345. if err != nil {
  346. return err
  347. }
  348. defer content.Close()
  349. return CopyTo(content, srcInfo, dstPath)
  350. }
  351. // CopyTo handles extracting the given content whose
  352. // entries should be sourced from srcInfo to dstPath.
  353. func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
  354. // The destination path need not exist, but CopyInfoDestinationPath will
  355. // ensure that at least the parent directory exists.
  356. dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))
  357. if err != nil {
  358. return err
  359. }
  360. dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo)
  361. if err != nil {
  362. return err
  363. }
  364. defer copyArchive.Close()
  365. options := &TarOptions{
  366. NoLchown: true,
  367. NoOverwriteDirNonDir: true,
  368. }
  369. return Untar(copyArchive, dstDir, options)
  370. }
  371. // ResolveHostSourcePath decides real path need to be copied with parameters such as
  372. // whether to follow symbol link or not, if followLink is true, resolvedPath will return
  373. // link target of any symbol link file, else it will only resolve symlink of directory
  374. // but return symbol link file itself without resolving.
  375. func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) {
  376. if followLink {
  377. resolvedPath, err = filepath.EvalSymlinks(path)
  378. if err != nil {
  379. return
  380. }
  381. resolvedPath, rebaseName = GetRebaseName(path, resolvedPath)
  382. } else {
  383. dirPath, basePath := filepath.Split(path)
  384. // if not follow symbol link, then resolve symbol link of parent dir
  385. var resolvedDirPath string
  386. resolvedDirPath, err = filepath.EvalSymlinks(dirPath)
  387. if err != nil {
  388. return
  389. }
  390. // resolvedDirPath will have been cleaned (no trailing path separators) so
  391. // we can manually join it with the base path element.
  392. resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath
  393. if hasTrailingPathSeparator(path, os.PathSeparator) &&
  394. filepath.Base(path) != filepath.Base(resolvedPath) {
  395. rebaseName = filepath.Base(path)
  396. }
  397. }
  398. return resolvedPath, rebaseName, nil
  399. }
  400. // GetRebaseName normalizes and compares path and resolvedPath,
  401. // return completed resolved path and rebased file name
  402. func GetRebaseName(path, resolvedPath string) (string, string) {
  403. // linkTarget will have been cleaned (no trailing path separators and dot) so
  404. // we can manually join it with them
  405. var rebaseName string
  406. if specifiesCurrentDir(path) &&
  407. !specifiesCurrentDir(resolvedPath) {
  408. resolvedPath += string(filepath.Separator) + "."
  409. }
  410. if hasTrailingPathSeparator(path, os.PathSeparator) &&
  411. !hasTrailingPathSeparator(resolvedPath, os.PathSeparator) {
  412. resolvedPath += string(filepath.Separator)
  413. }
  414. if filepath.Base(path) != filepath.Base(resolvedPath) {
  415. // In the case where the path had a trailing separator and a symlink
  416. // evaluation has changed the last path component, we will need to
  417. // rebase the name in the archive that is being copied to match the
  418. // originally requested name.
  419. rebaseName = filepath.Base(path)
  420. }
  421. return resolvedPath, rebaseName
  422. }