httpsource.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. package http
  2. import (
  3. "context"
  4. "crypto/sha256"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "mime"
  9. "net/http"
  10. "net/url"
  11. "os"
  12. "path"
  13. "path/filepath"
  14. "strings"
  15. "time"
  16. "github.com/docker/docker/pkg/locker"
  17. "github.com/moby/buildkit/cache"
  18. "github.com/moby/buildkit/cache/metadata"
  19. "github.com/moby/buildkit/snapshot"
  20. "github.com/moby/buildkit/source"
  21. "github.com/moby/buildkit/util/tracing"
  22. digest "github.com/opencontainers/go-digest"
  23. "github.com/pkg/errors"
  24. bolt "go.etcd.io/bbolt"
  25. )
  26. type Opt struct {
  27. CacheAccessor cache.Accessor
  28. MetadataStore *metadata.Store
  29. Transport http.RoundTripper
  30. }
  31. type httpSource struct {
  32. md *metadata.Store
  33. cache cache.Accessor
  34. locker *locker.Locker
  35. client *http.Client
  36. }
  37. func NewSource(opt Opt) (source.Source, error) {
  38. transport := opt.Transport
  39. if transport == nil {
  40. transport = tracing.DefaultTransport
  41. }
  42. hs := &httpSource{
  43. md: opt.MetadataStore,
  44. cache: opt.CacheAccessor,
  45. locker: locker.New(),
  46. client: &http.Client{
  47. Transport: transport,
  48. },
  49. }
  50. return hs, nil
  51. }
  52. func (hs *httpSource) ID() string {
  53. return source.HttpsScheme
  54. }
  55. type httpSourceHandler struct {
  56. *httpSource
  57. src source.HttpIdentifier
  58. refID string
  59. cacheKey digest.Digest
  60. }
  61. func (hs *httpSource) Resolve(ctx context.Context, id source.Identifier) (source.SourceInstance, error) {
  62. httpIdentifier, ok := id.(*source.HttpIdentifier)
  63. if !ok {
  64. return nil, errors.Errorf("invalid http identifier %v", id)
  65. }
  66. return &httpSourceHandler{
  67. src: *httpIdentifier,
  68. httpSource: hs,
  69. }, nil
  70. }
  71. // urlHash is internal hash the etag is stored by that doesn't leak outside
  72. // this package.
  73. func (hs *httpSourceHandler) urlHash() (digest.Digest, error) {
  74. dt, err := json.Marshal(struct {
  75. Filename string
  76. Perm, UID, GID int
  77. }{
  78. Filename: getFileName(hs.src.URL, hs.src.Filename, nil),
  79. Perm: hs.src.Perm,
  80. UID: hs.src.UID,
  81. GID: hs.src.GID,
  82. })
  83. if err != nil {
  84. return "", err
  85. }
  86. return digest.FromBytes(dt), nil
  87. }
  88. func (hs *httpSourceHandler) formatCacheKey(filename string, dgst digest.Digest, lastModTime string) digest.Digest {
  89. dt, err := json.Marshal(struct {
  90. Filename string
  91. Perm, UID, GID int
  92. Checksum digest.Digest
  93. LastModTime string `json:",omitempty"`
  94. }{
  95. Filename: filename,
  96. Perm: hs.src.Perm,
  97. UID: hs.src.UID,
  98. GID: hs.src.GID,
  99. Checksum: dgst,
  100. LastModTime: lastModTime,
  101. })
  102. if err != nil {
  103. return dgst
  104. }
  105. return digest.FromBytes(dt)
  106. }
  107. func (hs *httpSourceHandler) CacheKey(ctx context.Context, index int) (string, bool, error) {
  108. if hs.src.Checksum != "" {
  109. hs.cacheKey = hs.src.Checksum
  110. return hs.formatCacheKey(getFileName(hs.src.URL, hs.src.Filename, nil), hs.src.Checksum, "").String(), true, nil
  111. }
  112. uh, err := hs.urlHash()
  113. if err != nil {
  114. return "", false, nil
  115. }
  116. // look up metadata(previously stored headers) for that URL
  117. sis, err := hs.md.Search(uh.String())
  118. if err != nil {
  119. return "", false, errors.Wrapf(err, "failed to search metadata for %s", uh)
  120. }
  121. req, err := http.NewRequest("GET", hs.src.URL, nil)
  122. if err != nil {
  123. return "", false, err
  124. }
  125. req = req.WithContext(ctx)
  126. m := map[string]*metadata.StorageItem{}
  127. if len(sis) > 0 {
  128. for _, si := range sis {
  129. // if metaDigest := getMetaDigest(si); metaDigest == hs.formatCacheKey("") {
  130. if etag := getETag(si); etag != "" {
  131. if dgst := getChecksum(si); dgst != "" {
  132. m[etag] = si
  133. req.Header.Add("If-None-Match", etag)
  134. }
  135. }
  136. // }
  137. }
  138. }
  139. resp, err := hs.client.Do(req)
  140. if err != nil {
  141. return "", false, err
  142. }
  143. if resp.StatusCode < 200 || resp.StatusCode >= 400 {
  144. return "", false, errors.Errorf("invalid response status %d", resp.StatusCode)
  145. }
  146. if resp.StatusCode == http.StatusNotModified {
  147. respETag := resp.Header.Get("ETag")
  148. si, ok := m[respETag]
  149. if !ok {
  150. return "", false, errors.Errorf("invalid not-modified ETag: %v", respETag)
  151. }
  152. hs.refID = si.ID()
  153. dgst := getChecksum(si)
  154. if dgst == "" {
  155. return "", false, errors.Errorf("invalid metadata change")
  156. }
  157. modTime := getModTime(si)
  158. resp.Body.Close()
  159. return hs.formatCacheKey(getFileName(hs.src.URL, hs.src.Filename, resp), dgst, modTime).String(), true, nil
  160. }
  161. ref, dgst, err := hs.save(ctx, resp)
  162. if err != nil {
  163. return "", false, err
  164. }
  165. ref.Release(context.TODO())
  166. hs.cacheKey = dgst
  167. return hs.formatCacheKey(getFileName(hs.src.URL, hs.src.Filename, resp), dgst, resp.Header.Get("Last-Modified")).String(), true, nil
  168. }
  169. func (hs *httpSourceHandler) save(ctx context.Context, resp *http.Response) (ref cache.ImmutableRef, dgst digest.Digest, retErr error) {
  170. newRef, err := hs.cache.New(ctx, nil, cache.CachePolicyRetain, cache.WithDescription(fmt.Sprintf("http url %s", hs.src.URL)))
  171. if err != nil {
  172. return nil, "", err
  173. }
  174. releaseRef := func() {
  175. newRef.Release(context.TODO())
  176. }
  177. defer func() {
  178. if retErr != nil && newRef != nil {
  179. releaseRef()
  180. }
  181. }()
  182. mount, err := newRef.Mount(ctx, false)
  183. if err != nil {
  184. return nil, "", err
  185. }
  186. lm := snapshot.LocalMounter(mount)
  187. dir, err := lm.Mount()
  188. if err != nil {
  189. return nil, "", err
  190. }
  191. defer func() {
  192. if retErr != nil && lm != nil {
  193. lm.Unmount()
  194. }
  195. }()
  196. perm := 0600
  197. if hs.src.Perm != 0 {
  198. perm = hs.src.Perm
  199. }
  200. fp := filepath.Join(dir, getFileName(hs.src.URL, hs.src.Filename, resp))
  201. f, err := os.OpenFile(fp, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(perm))
  202. if err != nil {
  203. return nil, "", err
  204. }
  205. defer func() {
  206. if f != nil {
  207. f.Close()
  208. }
  209. }()
  210. h := sha256.New()
  211. if _, err := io.Copy(io.MultiWriter(f, h), resp.Body); err != nil {
  212. return nil, "", err
  213. }
  214. if err := f.Close(); err != nil {
  215. return nil, "", err
  216. }
  217. f = nil
  218. if hs.src.UID != 0 || hs.src.GID != 0 {
  219. if err := os.Chown(fp, hs.src.UID, hs.src.GID); err != nil {
  220. return nil, "", err
  221. }
  222. }
  223. mTime := time.Unix(0, 0)
  224. lastMod := resp.Header.Get("Last-Modified")
  225. if lastMod != "" {
  226. if parsedMTime, err := http.ParseTime(lastMod); err == nil {
  227. mTime = parsedMTime
  228. }
  229. }
  230. if err := os.Chtimes(fp, mTime, mTime); err != nil {
  231. return nil, "", err
  232. }
  233. lm.Unmount()
  234. lm = nil
  235. ref, err = newRef.Commit(ctx)
  236. if err != nil {
  237. return nil, "", err
  238. }
  239. newRef = nil
  240. hs.refID = ref.ID()
  241. dgst = digest.NewDigest(digest.SHA256, h)
  242. if respETag := resp.Header.Get("ETag"); respETag != "" {
  243. setETag(ref.Metadata(), respETag)
  244. uh, err := hs.urlHash()
  245. if err != nil {
  246. return nil, "", err
  247. }
  248. setChecksum(ref.Metadata(), uh.String(), dgst)
  249. if err := ref.Metadata().Commit(); err != nil {
  250. return nil, "", err
  251. }
  252. }
  253. if modTime := resp.Header.Get("Last-Modified"); modTime != "" {
  254. setModTime(ref.Metadata(), modTime)
  255. }
  256. return ref, dgst, nil
  257. }
  258. func (hs *httpSourceHandler) Snapshot(ctx context.Context) (cache.ImmutableRef, error) {
  259. if hs.refID != "" {
  260. ref, err := hs.cache.Get(ctx, hs.refID)
  261. if err == nil {
  262. return ref, nil
  263. }
  264. }
  265. req, err := http.NewRequest("GET", hs.src.URL, nil)
  266. if err != nil {
  267. return nil, err
  268. }
  269. req = req.WithContext(ctx)
  270. resp, err := hs.client.Do(req)
  271. if err != nil {
  272. return nil, err
  273. }
  274. ref, dgst, err := hs.save(ctx, resp)
  275. if err != nil {
  276. return nil, err
  277. }
  278. if dgst != hs.cacheKey {
  279. ref.Release(context.TODO())
  280. return nil, errors.Errorf("digest mismatch %s: %s", dgst, hs.cacheKey)
  281. }
  282. return ref, nil
  283. }
  284. const keyETag = "etag"
  285. const keyChecksum = "http.checksum"
  286. const keyModTime = "http.modtime"
  287. func setETag(si *metadata.StorageItem, s string) error {
  288. v, err := metadata.NewValue(s)
  289. if err != nil {
  290. return errors.Wrap(err, "failed to create etag value")
  291. }
  292. si.Queue(func(b *bolt.Bucket) error {
  293. return si.SetValue(b, keyETag, v)
  294. })
  295. return nil
  296. }
  297. func getETag(si *metadata.StorageItem) string {
  298. v := si.Get(keyETag)
  299. if v == nil {
  300. return ""
  301. }
  302. var etag string
  303. if err := v.Unmarshal(&etag); err != nil {
  304. return ""
  305. }
  306. return etag
  307. }
  308. func setModTime(si *metadata.StorageItem, s string) error {
  309. v, err := metadata.NewValue(s)
  310. if err != nil {
  311. return errors.Wrap(err, "failed to create modtime value")
  312. }
  313. si.Queue(func(b *bolt.Bucket) error {
  314. return si.SetValue(b, keyModTime, v)
  315. })
  316. return nil
  317. }
  318. func getModTime(si *metadata.StorageItem) string {
  319. v := si.Get(keyModTime)
  320. if v == nil {
  321. return ""
  322. }
  323. var modTime string
  324. if err := v.Unmarshal(&modTime); err != nil {
  325. return ""
  326. }
  327. return modTime
  328. }
  329. func setChecksum(si *metadata.StorageItem, url string, d digest.Digest) error {
  330. v, err := metadata.NewValue(d)
  331. if err != nil {
  332. return errors.Wrap(err, "failed to create checksum value")
  333. }
  334. v.Index = url
  335. si.Queue(func(b *bolt.Bucket) error {
  336. return si.SetValue(b, keyChecksum, v)
  337. })
  338. return nil
  339. }
  340. func getChecksum(si *metadata.StorageItem) digest.Digest {
  341. v := si.Get(keyChecksum)
  342. if v == nil {
  343. return ""
  344. }
  345. var dgstStr string
  346. if err := v.Unmarshal(&dgstStr); err != nil {
  347. return ""
  348. }
  349. dgst, err := digest.Parse(dgstStr)
  350. if err != nil {
  351. return ""
  352. }
  353. return dgst
  354. }
  355. func getFileName(urlStr, manualFilename string, resp *http.Response) string {
  356. if manualFilename != "" {
  357. return manualFilename
  358. }
  359. if resp != nil {
  360. if contentDisposition := resp.Header.Get("Content-Disposition"); contentDisposition != "" {
  361. if _, params, err := mime.ParseMediaType(contentDisposition); err == nil {
  362. if params["filename"] != "" && !strings.HasSuffix(params["filename"], "/") {
  363. if filename := filepath.Base(filepath.FromSlash(params["filename"])); filename != "" {
  364. return filename
  365. }
  366. }
  367. }
  368. }
  369. }
  370. u, err := url.Parse(urlStr)
  371. if err == nil {
  372. if base := path.Base(u.Path); base != "." && base != "/" {
  373. return base
  374. }
  375. }
  376. return "download"
  377. }