copy.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. package dockerfile // import "github.com/docker/docker/builder/dockerfile"
  2. import (
  3. "archive/tar"
  4. "fmt"
  5. "io"
  6. "mime"
  7. "net/http"
  8. "net/url"
  9. "os"
  10. "path/filepath"
  11. "runtime"
  12. "sort"
  13. "strings"
  14. "time"
  15. "github.com/docker/docker/builder"
  16. "github.com/docker/docker/builder/remotecontext"
  17. "github.com/docker/docker/pkg/archive"
  18. "github.com/docker/docker/pkg/containerfs"
  19. "github.com/docker/docker/pkg/idtools"
  20. "github.com/docker/docker/pkg/ioutils"
  21. "github.com/docker/docker/pkg/progress"
  22. "github.com/docker/docker/pkg/streamformatter"
  23. "github.com/docker/docker/pkg/system"
  24. "github.com/docker/docker/pkg/urlutil"
  25. "github.com/moby/buildkit/frontend/dockerfile/instructions"
  26. specs "github.com/opencontainers/image-spec/specs-go/v1"
  27. "github.com/pkg/errors"
  28. )
  29. const unnamedFilename = "__unnamed__"
  30. type pathCache interface {
  31. Load(key interface{}) (value interface{}, ok bool)
  32. Store(key, value interface{})
  33. }
  34. // copyInfo is a data object which stores the metadata about each source file in
  35. // a copyInstruction
  36. type copyInfo struct {
  37. root containerfs.ContainerFS
  38. path string
  39. hash string
  40. noDecompress bool
  41. }
  42. func (c copyInfo) fullPath() (string, error) {
  43. return c.root.ResolveScopedPath(c.path, true)
  44. }
  45. func newCopyInfoFromSource(source builder.Source, path string, hash string) copyInfo {
  46. return copyInfo{root: source.Root(), path: path, hash: hash}
  47. }
  48. func newCopyInfos(copyInfos ...copyInfo) []copyInfo {
  49. return copyInfos
  50. }
  51. // copyInstruction is a fully parsed COPY or ADD command that is passed to
  52. // Builder.performCopy to copy files into the image filesystem
  53. type copyInstruction struct {
  54. cmdName string
  55. infos []copyInfo
  56. dest string
  57. chownStr string
  58. allowLocalDecompression bool
  59. preserveOwnership bool
  60. }
  61. // copier reads a raw COPY or ADD command, fetches remote sources using a downloader,
  62. // and creates a copyInstruction
  63. type copier struct {
  64. imageSource *imageMount
  65. source builder.Source
  66. pathCache pathCache
  67. download sourceDownloader
  68. platform *specs.Platform
  69. // for cleanup. TODO: having copier.cleanup() is error prone and hard to
  70. // follow. Code calling performCopy should manage the lifecycle of its params.
  71. // Copier should take override source as input, not imageMount.
  72. activeLayer builder.RWLayer
  73. tmpPaths []string
  74. }
  75. func copierFromDispatchRequest(req dispatchRequest, download sourceDownloader, imageSource *imageMount) copier {
  76. platform := req.builder.platform
  77. if platform == nil {
  78. // May be nil if not explicitly set in API/dockerfile
  79. platform = &specs.Platform{}
  80. }
  81. if platform.OS == "" {
  82. // Default to the dispatch requests operating system if not explicit in API/dockerfile
  83. platform.OS = req.state.operatingSystem
  84. }
  85. if platform.OS == "" {
  86. // This is a failsafe just in case. Shouldn't be hit.
  87. platform.OS = runtime.GOOS
  88. }
  89. return copier{
  90. source: req.source,
  91. pathCache: req.builder.pathCache,
  92. download: download,
  93. imageSource: imageSource,
  94. platform: platform,
  95. }
  96. }
  97. func (o *copier) createCopyInstruction(sourcesAndDest instructions.SourcesAndDest, cmdName string) (copyInstruction, error) {
  98. inst := copyInstruction{
  99. cmdName: cmdName,
  100. dest: filepath.FromSlash(sourcesAndDest.DestPath),
  101. }
  102. infos, err := o.getCopyInfosForSourcePaths(sourcesAndDest.SourcePaths, inst.dest)
  103. if err != nil {
  104. return inst, errors.Wrapf(err, "%s failed", cmdName)
  105. }
  106. if len(infos) > 1 && !strings.HasSuffix(inst.dest, string(os.PathSeparator)) {
  107. return inst, errors.Errorf("When using %s with more than one source file, the destination must be a directory and end with a /", cmdName)
  108. }
  109. inst.infos = infos
  110. return inst, nil
  111. }
  112. // getCopyInfosForSourcePaths iterates over the source files and calculate the info
  113. // needed to copy (e.g. hash value if cached)
  114. // The dest is used in case source is URL (and ends with "/")
  115. func (o *copier) getCopyInfosForSourcePaths(sources []string, dest string) ([]copyInfo, error) {
  116. var infos []copyInfo
  117. for _, orig := range sources {
  118. subinfos, err := o.getCopyInfoForSourcePath(orig, dest)
  119. if err != nil {
  120. return nil, err
  121. }
  122. infos = append(infos, subinfos...)
  123. }
  124. if len(infos) == 0 {
  125. return nil, errors.New("no source files were specified")
  126. }
  127. return infos, nil
  128. }
  129. func (o *copier) getCopyInfoForSourcePath(orig, dest string) ([]copyInfo, error) {
  130. if !urlutil.IsURL(orig) {
  131. return o.calcCopyInfo(orig, true)
  132. }
  133. remote, path, err := o.download(orig)
  134. if err != nil {
  135. return nil, err
  136. }
  137. // If path == "" then we are unable to determine filename from src
  138. // We have to make sure dest is available
  139. if path == "" {
  140. if strings.HasSuffix(dest, "/") {
  141. return nil, errors.Errorf("cannot determine filename for source %s", orig)
  142. }
  143. path = unnamedFilename
  144. }
  145. o.tmpPaths = append(o.tmpPaths, remote.Root().Path())
  146. hash, err := remote.Hash(path)
  147. ci := newCopyInfoFromSource(remote, path, hash)
  148. ci.noDecompress = true // data from http shouldn't be extracted even on ADD
  149. return newCopyInfos(ci), err
  150. }
  151. // Cleanup removes any temporary directories created as part of downloading
  152. // remote files.
  153. func (o *copier) Cleanup() {
  154. for _, path := range o.tmpPaths {
  155. os.RemoveAll(path)
  156. }
  157. o.tmpPaths = []string{}
  158. if o.activeLayer != nil {
  159. o.activeLayer.Release()
  160. o.activeLayer = nil
  161. }
  162. }
  163. // TODO: allowWildcards can probably be removed by refactoring this function further.
  164. func (o *copier) calcCopyInfo(origPath string, allowWildcards bool) ([]copyInfo, error) {
  165. imageSource := o.imageSource
  166. if err := validateCopySourcePath(imageSource, origPath); err != nil {
  167. return nil, err
  168. }
  169. // TODO: do this when creating copier. Requires validateCopySourcePath
  170. // (and other below) to be aware of the difference sources. Why is it only
  171. // done on image Source?
  172. if imageSource != nil && o.activeLayer == nil {
  173. // this needs to be protected against repeated calls as wildcard copy
  174. // will call it multiple times for a single COPY
  175. var err error
  176. rwLayer, err := imageSource.NewRWLayer()
  177. if err != nil {
  178. return nil, err
  179. }
  180. o.activeLayer = rwLayer
  181. o.source, err = remotecontext.NewLazySource(rwLayer.Root())
  182. if err != nil {
  183. return nil, errors.Wrapf(err, "failed to create context for copy from %s", rwLayer.Root().Path())
  184. }
  185. }
  186. if o.source == nil {
  187. return nil, errors.Errorf("missing build context")
  188. }
  189. // Work in daemon-specific OS filepath semantics
  190. origPath = filepath.FromSlash(origPath)
  191. origPath = strings.TrimPrefix(origPath, string(os.PathSeparator))
  192. origPath = strings.TrimPrefix(origPath, "."+string(os.PathSeparator))
  193. // Deal with wildcards
  194. if allowWildcards && containsWildcards(origPath) {
  195. return o.copyWithWildcards(origPath)
  196. }
  197. if imageSource != nil && imageSource.ImageID() != "" {
  198. // return a cached copy if one exists
  199. if h, ok := o.pathCache.Load(imageSource.ImageID() + origPath); ok {
  200. return newCopyInfos(newCopyInfoFromSource(o.source, origPath, h.(string))), nil
  201. }
  202. }
  203. // Deal with the single file case
  204. copyInfo, err := copyInfoForFile(o.source, origPath)
  205. switch {
  206. case imageSource == nil && errors.Is(err, os.ErrNotExist):
  207. return nil, errors.Wrapf(err, "file not found in build context or excluded by .dockerignore")
  208. case err != nil:
  209. return nil, err
  210. case copyInfo.hash != "":
  211. o.storeInPathCache(imageSource, origPath, copyInfo.hash)
  212. return newCopyInfos(copyInfo), err
  213. }
  214. // TODO: remove, handle dirs in Hash()
  215. subfiles, err := walkSource(o.source, origPath)
  216. if err != nil {
  217. return nil, err
  218. }
  219. hash := hashStringSlice("dir", subfiles)
  220. o.storeInPathCache(imageSource, origPath, hash)
  221. return newCopyInfos(newCopyInfoFromSource(o.source, origPath, hash)), nil
  222. }
  223. func (o *copier) storeInPathCache(im *imageMount, path string, hash string) {
  224. if im != nil {
  225. o.pathCache.Store(im.ImageID()+path, hash)
  226. }
  227. }
  228. func (o *copier) copyWithWildcards(origPath string) ([]copyInfo, error) {
  229. root := o.source.Root()
  230. var copyInfos []copyInfo
  231. if err := root.Walk(root.Path(), func(path string, info os.FileInfo, err error) error {
  232. if err != nil {
  233. return err
  234. }
  235. rel, err := remotecontext.Rel(root, path)
  236. if err != nil {
  237. return err
  238. }
  239. if rel == "." {
  240. return nil
  241. }
  242. if match, _ := root.Match(origPath, rel); !match {
  243. return nil
  244. }
  245. // Note we set allowWildcards to false in case the name has
  246. // a * in it
  247. subInfos, err := o.calcCopyInfo(rel, false)
  248. if err != nil {
  249. return err
  250. }
  251. copyInfos = append(copyInfos, subInfos...)
  252. return nil
  253. }); err != nil {
  254. return nil, err
  255. }
  256. return copyInfos, nil
  257. }
  258. func copyInfoForFile(source builder.Source, path string) (copyInfo, error) {
  259. fi, err := remotecontext.StatAt(source, path)
  260. if err != nil {
  261. if errors.Is(err, os.ErrNotExist) {
  262. // return the relative path in the error, which is more user-friendly than the full path to the tmp-dir
  263. return copyInfo{}, errors.WithStack(&os.PathError{Op: "stat", Path: path, Err: os.ErrNotExist})
  264. }
  265. return copyInfo{}, err
  266. }
  267. if fi.IsDir() {
  268. return copyInfo{}, nil
  269. }
  270. hash, err := source.Hash(path)
  271. if err != nil {
  272. return copyInfo{}, err
  273. }
  274. return newCopyInfoFromSource(source, path, "file:"+hash), nil
  275. }
  276. // TODO: dedupe with copyWithWildcards()
  277. func walkSource(source builder.Source, origPath string) ([]string, error) {
  278. fp, err := remotecontext.FullPath(source, origPath)
  279. if err != nil {
  280. return nil, err
  281. }
  282. // Must be a dir
  283. var subfiles []string
  284. err = source.Root().Walk(fp, func(path string, info os.FileInfo, err error) error {
  285. if err != nil {
  286. return err
  287. }
  288. rel, err := remotecontext.Rel(source.Root(), path)
  289. if err != nil {
  290. return err
  291. }
  292. if rel == "." {
  293. return nil
  294. }
  295. hash, err := source.Hash(rel)
  296. if err != nil {
  297. return nil
  298. }
  299. // we already checked handleHash above
  300. subfiles = append(subfiles, hash)
  301. return nil
  302. })
  303. if err != nil {
  304. return nil, err
  305. }
  306. sort.Strings(subfiles)
  307. return subfiles, nil
  308. }
  309. type sourceDownloader func(string) (builder.Source, string, error)
  310. func newRemoteSourceDownloader(output, stdout io.Writer) sourceDownloader {
  311. return func(url string) (builder.Source, string, error) {
  312. return downloadSource(output, stdout, url)
  313. }
  314. }
  315. func errOnSourceDownload(_ string) (builder.Source, string, error) {
  316. return nil, "", errors.New("source can't be a URL for COPY")
  317. }
  318. func getFilenameForDownload(path string, resp *http.Response) string {
  319. // Guess filename based on source
  320. if path != "" && !strings.HasSuffix(path, "/") {
  321. if filename := filepath.Base(filepath.FromSlash(path)); filename != "" {
  322. return filename
  323. }
  324. }
  325. // Guess filename based on Content-Disposition
  326. if contentDisposition := resp.Header.Get("Content-Disposition"); contentDisposition != "" {
  327. if _, params, err := mime.ParseMediaType(contentDisposition); err == nil {
  328. if params["filename"] != "" && !strings.HasSuffix(params["filename"], "/") {
  329. if filename := filepath.Base(filepath.FromSlash(params["filename"])); filename != "" {
  330. return filename
  331. }
  332. }
  333. }
  334. }
  335. return ""
  336. }
  337. func downloadSource(output io.Writer, stdout io.Writer, srcURL string) (remote builder.Source, p string, err error) {
  338. u, err := url.Parse(srcURL)
  339. if err != nil {
  340. return
  341. }
  342. resp, err := remotecontext.GetWithStatusError(srcURL)
  343. if err != nil {
  344. return
  345. }
  346. filename := getFilenameForDownload(u.Path, resp)
  347. // Prepare file in a tmp dir
  348. tmpDir, err := ioutils.TempDir("", "docker-remote")
  349. if err != nil {
  350. return
  351. }
  352. defer func() {
  353. if err != nil {
  354. os.RemoveAll(tmpDir)
  355. }
  356. }()
  357. // If filename is empty, the returned filename will be "" but
  358. // the tmp filename will be created as "__unnamed__"
  359. tmpFileName := filename
  360. if filename == "" {
  361. tmpFileName = unnamedFilename
  362. }
  363. tmpFileName = filepath.Join(tmpDir, tmpFileName)
  364. tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
  365. if err != nil {
  366. return
  367. }
  368. progressOutput := streamformatter.NewJSONProgressOutput(output, true)
  369. progressReader := progress.NewProgressReader(resp.Body, progressOutput, resp.ContentLength, "", "Downloading")
  370. // Download and dump result to tmp file
  371. // TODO: add filehash directly
  372. if _, err = io.Copy(tmpFile, progressReader); err != nil {
  373. tmpFile.Close()
  374. return
  375. }
  376. // TODO: how important is this random blank line to the output?
  377. fmt.Fprintln(stdout)
  378. // Set the mtime to the Last-Modified header value if present
  379. // Otherwise just remove atime and mtime
  380. mTime := time.Time{}
  381. lastMod := resp.Header.Get("Last-Modified")
  382. if lastMod != "" {
  383. // If we can't parse it then just let it default to 'zero'
  384. // otherwise use the parsed time value
  385. if parsedMTime, err := http.ParseTime(lastMod); err == nil {
  386. mTime = parsedMTime
  387. }
  388. }
  389. tmpFile.Close()
  390. if err = system.Chtimes(tmpFileName, mTime, mTime); err != nil {
  391. return
  392. }
  393. lc, err := remotecontext.NewLazySource(containerfs.NewLocalContainerFS(tmpDir))
  394. return lc, filename, err
  395. }
  396. type copyFileOptions struct {
  397. decompress bool
  398. identity *idtools.Identity
  399. archiver Archiver
  400. }
  401. type copyEndpoint struct {
  402. driver containerfs.Driver
  403. path string
  404. }
  405. func performCopyForInfo(dest copyInfo, source copyInfo, options copyFileOptions) error {
  406. srcPath, err := source.fullPath()
  407. if err != nil {
  408. return err
  409. }
  410. destPath, err := dest.fullPath()
  411. if err != nil {
  412. return err
  413. }
  414. archiver := options.archiver
  415. srcEndpoint := &copyEndpoint{driver: source.root, path: srcPath}
  416. destEndpoint := &copyEndpoint{driver: dest.root, path: destPath}
  417. src, err := source.root.Stat(srcPath)
  418. if err != nil {
  419. return errors.Wrapf(err, "source path not found")
  420. }
  421. if src.IsDir() {
  422. return copyDirectory(archiver, srcEndpoint, destEndpoint, options.identity)
  423. }
  424. if options.decompress && isArchivePath(source.root, srcPath) && !source.noDecompress {
  425. return archiver.UntarPath(srcPath, destPath)
  426. }
  427. destExistsAsDir, err := isExistingDirectory(destEndpoint)
  428. if err != nil {
  429. return err
  430. }
  431. // dest.path must be used because destPath has already been cleaned of any
  432. // trailing slash
  433. if endsInSlash(dest.root, dest.path) || destExistsAsDir {
  434. // source.path must be used to get the correct filename when the source
  435. // is a symlink
  436. destPath = dest.root.Join(destPath, source.root.Base(source.path))
  437. destEndpoint = &copyEndpoint{driver: dest.root, path: destPath}
  438. }
  439. return copyFile(archiver, srcEndpoint, destEndpoint, options.identity)
  440. }
  441. func isArchivePath(driver containerfs.ContainerFS, path string) bool {
  442. file, err := driver.Open(path)
  443. if err != nil {
  444. return false
  445. }
  446. defer file.Close()
  447. rdr, err := archive.DecompressStream(file)
  448. if err != nil {
  449. return false
  450. }
  451. r := tar.NewReader(rdr)
  452. _, err = r.Next()
  453. return err == nil
  454. }
  455. func copyDirectory(archiver Archiver, source, dest *copyEndpoint, identity *idtools.Identity) error {
  456. destExists, err := isExistingDirectory(dest)
  457. if err != nil {
  458. return errors.Wrapf(err, "failed to query destination path")
  459. }
  460. if err := archiver.CopyWithTar(source.path, dest.path); err != nil {
  461. return errors.Wrapf(err, "failed to copy directory")
  462. }
  463. if identity != nil {
  464. return fixPermissions(source.path, dest.path, *identity, !destExists)
  465. }
  466. return nil
  467. }
  468. func copyFile(archiver Archiver, source, dest *copyEndpoint, identity *idtools.Identity) error {
  469. if identity == nil {
  470. // Use system.MkdirAll here, which is a custom version of os.MkdirAll
  471. // modified for use on Windows to handle volume GUID paths. These paths
  472. // are of the form \\?\Volume{<GUID>}\<path>. An example would be:
  473. // \\?\Volume{dae8d3ac-b9a1-11e9-88eb-e8554b2ba1db}\bin\busybox.exe
  474. if err := system.MkdirAll(filepath.Dir(dest.path), 0755); err != nil {
  475. return err
  476. }
  477. } else {
  478. if err := idtools.MkdirAllAndChownNew(filepath.Dir(dest.path), 0755, *identity); err != nil {
  479. return errors.Wrapf(err, "failed to create new directory")
  480. }
  481. }
  482. if err := archiver.CopyFileWithTar(source.path, dest.path); err != nil {
  483. return errors.Wrapf(err, "failed to copy file")
  484. }
  485. if identity != nil {
  486. return fixPermissions(source.path, dest.path, *identity, false)
  487. }
  488. return nil
  489. }
  490. func endsInSlash(driver containerfs.Driver, path string) bool {
  491. return strings.HasSuffix(path, string(driver.Separator()))
  492. }
  493. // isExistingDirectory returns true if the path exists and is a directory
  494. func isExistingDirectory(point *copyEndpoint) (bool, error) {
  495. destStat, err := point.driver.Stat(point.path)
  496. switch {
  497. case errors.Is(err, os.ErrNotExist):
  498. return false, nil
  499. case err != nil:
  500. return false, err
  501. }
  502. return destStat.IsDir(), nil
  503. }