download-frozen-image-v2.sh 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. #!/usr/bin/env bash
  2. set -eo pipefail
  3. # This script was developed for use in Moby's CI, and as such the use cases and
  4. # usability are (intentionally) limited. You may find this script useful for
  5. # educational purposes, for example, to learn how pulling images works "under
  6. # the hood", and you may be able to use it for other purposes, but it should not
  7. # be considered a "general purpose" tool for pulling images.
  8. #
  9. # The project maintainers accept contributions to this script within its intended
  10. # scope, but may not accept contributions beyond that.
  11. #
  12. # For users who have a similar need but require more flexibility/functionality,
  13. # refer the the discussion on GitHub, which mentions various alternatives that
  14. # are more suitable for other uses: https://github.com/moby/moby/issues/40857
  15. # hello-world latest ef872312fe1b 3 months ago 910 B
  16. # hello-world latest ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9 3 months ago 910 B
  17. # debian latest f6fab3b798be 10 weeks ago 85.1 MB
  18. # debian latest f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd 10 weeks ago 85.1 MB
  19. # check if essential commands are in our PATH
  20. for cmd in curl jq; do
  21. if ! command -v $cmd &> /dev/null; then
  22. echo >&2 "error: \"$cmd\" not found!"
  23. exit 1
  24. fi
  25. done
  26. usage() {
  27. echo "usage: $0 dir image[:tag][@digest] ..."
  28. echo " $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7"
  29. [ -z "$1" ] || exit "$1"
  30. }
  31. dir="$1" # dir for building tar in
  32. shift || usage 1 >&2
  33. if ! [ $# -gt 0 ] && [ "$dir" ]; then
  34. usage 2 >&2
  35. fi
  36. mkdir -p "$dir"
  37. # hacky workarounds for Bash 3 support (no associative arrays)
  38. images=()
  39. rm -f "$dir"/tags-*.tmp
  40. manifestJsonEntries=()
  41. doNotGenerateManifestJson=
  42. # repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."'
  43. # bash v4 on Windows CI requires CRLF separator... and linux doesn't seem to care either way
  44. newlineIFS=$'\n'
  45. major=$(echo "${BASH_VERSION%%[^0.9]}" | cut -d. -f1)
  46. if [ "$major" -ge 4 ]; then
  47. newlineIFS=$'\r\n'
  48. fi
  49. registryBase='https://registry-1.docker.io'
  50. authBase='https://auth.docker.io'
  51. authService='registry.docker.io'
  52. # https://github.com/moby/moby/issues/33700
  53. fetch_blob() {
  54. local token="$1"
  55. shift
  56. local image="$1"
  57. shift
  58. local digest="$1"
  59. shift
  60. local targetFile="$1"
  61. shift
  62. local curlArgs=("$@")
  63. local curlHeaders
  64. curlHeaders="$(
  65. curl -S "${curlArgs[@]}" \
  66. -H "Authorization: Bearer $token" \
  67. "$registryBase/v2/$image/blobs/$digest" \
  68. -o "$targetFile" \
  69. -D-
  70. )"
  71. curlHeaders="$(echo "$curlHeaders" | tr -d '\r')"
  72. if grep -qE "^HTTP/[0-9].[0-9] 3" <<< "$curlHeaders"; then
  73. rm -f "$targetFile"
  74. local blobRedirect
  75. blobRedirect="$(echo "$curlHeaders" | awk -F ': ' 'tolower($1) == "location" { print $2; exit }')"
  76. if [ -z "$blobRedirect" ]; then
  77. echo >&2 "error: failed fetching '$image' blob '$digest'"
  78. echo "$curlHeaders" | head -1 >&2
  79. return 1
  80. fi
  81. curl -fSL "${curlArgs[@]}" \
  82. "$blobRedirect" \
  83. -o "$targetFile"
  84. fi
  85. }
  86. # handle 'application/vnd.docker.distribution.manifest.v2+json' manifest
  87. handle_single_manifest_v2() {
  88. local manifestJson="$1"
  89. shift
  90. local configDigest
  91. configDigest="$(echo "$manifestJson" | jq --raw-output '.config.digest')"
  92. local imageId="${configDigest#*:}" # strip off "sha256:"
  93. local configFile="$imageId.json"
  94. fetch_blob "$token" "$image" "$configDigest" "$dir/$configFile" -s
  95. local layersFs
  96. layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.layers[]')"
  97. local IFS="$newlineIFS"
  98. local layers
  99. mapfile -t layers <<< "$layersFs"
  100. unset IFS
  101. echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
  102. local layerId=
  103. local layerFiles=()
  104. for i in "${!layers[@]}"; do
  105. local layerMeta="${layers[$i]}"
  106. local layerMediaType
  107. layerMediaType="$(echo "$layerMeta" | jq --raw-output '.mediaType')"
  108. local layerDigest
  109. layerDigest="$(echo "$layerMeta" | jq --raw-output '.digest')"
  110. # save the previous layer's ID
  111. local parentId="$layerId"
  112. # create a new fake layer ID based on this layer's digest and the previous layer's fake ID
  113. layerId="$(echo "$parentId"$'\n'"$layerDigest" | sha256sum | cut -d' ' -f1)"
  114. # this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value)
  115. mkdir -p "$dir/$layerId"
  116. echo '1.0' > "$dir/$layerId/VERSION"
  117. if [ ! -s "$dir/$layerId/json" ]; then
  118. local parentJson
  119. parentJson="$(printf ', parent: "%s"' "$parentId")"
  120. local addJson
  121. addJson="$(printf '{ id: "%s"%s }' "$layerId" "${parentId:+$parentJson}")"
  122. # this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers
  123. jq "$addJson + ." > "$dir/$layerId/json" <<- 'EOJSON'
  124. {
  125. "created": "0001-01-01T00:00:00Z",
  126. "container_config": {
  127. "Hostname": "",
  128. "Domainname": "",
  129. "User": "",
  130. "AttachStdin": false,
  131. "AttachStdout": false,
  132. "AttachStderr": false,
  133. "Tty": false,
  134. "OpenStdin": false,
  135. "StdinOnce": false,
  136. "Env": null,
  137. "Cmd": null,
  138. "Image": "",
  139. "Volumes": null,
  140. "WorkingDir": "",
  141. "Entrypoint": null,
  142. "OnBuild": null,
  143. "Labels": null
  144. }
  145. }
  146. EOJSON
  147. fi
  148. case "$layerMediaType" in
  149. application/vnd.docker.image.rootfs.diff.tar.gzip)
  150. local layerTar="$layerId/layer.tar"
  151. layerFiles=("${layerFiles[@]}" "$layerTar")
  152. # TODO figure out why "-C -" doesn't work here
  153. # "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
  154. # "HTTP/1.1 416 Requested Range Not Satisfiable"
  155. if [ -f "$dir/$layerTar" ]; then
  156. # TODO hackpatch for no -C support :'(
  157. echo "skipping existing ${layerId:0:12}"
  158. continue
  159. fi
  160. local token
  161. token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
  162. fetch_blob "$token" "$image" "$layerDigest" "$dir/$layerTar" --progress-bar
  163. ;;
  164. *)
  165. echo >&2 "error: unknown layer mediaType ($imageIdentifier, $layerDigest): '$layerMediaType'"
  166. exit 1
  167. ;;
  168. esac
  169. done
  170. # change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons)
  171. imageId="$layerId"
  172. # munge the top layer image manifest to have the appropriate image configuration for older daemons
  173. local imageOldConfig
  174. imageOldConfig="$(jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' "$dir/$imageId/json")"
  175. jq --raw-output "$imageOldConfig + del(.history, .rootfs)" "$dir/$configFile" > "$dir/$imageId/json"
  176. local manifestJsonEntry
  177. manifestJsonEntry="$(
  178. echo '{}' | jq --raw-output '. + {
  179. Config: "'"$configFile"'",
  180. RepoTags: ["'"${image#library\/}:$tag"'"],
  181. Layers: '"$(echo '[]' | jq --raw-output ".$(for layerFile in "${layerFiles[@]}"; do echo " + [ \"$layerFile\" ]"; done)")"'
  182. }'
  183. )"
  184. manifestJsonEntries=("${manifestJsonEntries[@]}" "$manifestJsonEntry")
  185. }
  186. get_target_arch() {
  187. if [ -n "${TARGETARCH:-}" ]; then
  188. echo "${TARGETARCH}"
  189. return 0
  190. fi
  191. if type go > /dev/null; then
  192. go env GOARCH
  193. return 0
  194. fi
  195. if type dpkg > /dev/null; then
  196. debArch="$(dpkg --print-architecture)"
  197. case "${debArch}" in
  198. armel | armhf)
  199. echo "arm"
  200. return 0
  201. ;;
  202. *64el)
  203. echo "${debArch%el}le"
  204. return 0
  205. ;;
  206. *)
  207. echo "${debArch}"
  208. return 0
  209. ;;
  210. esac
  211. fi
  212. if type uname > /dev/null; then
  213. uArch="$(uname -m)"
  214. case "${uArch}" in
  215. x86_64)
  216. echo amd64
  217. return 0
  218. ;;
  219. arm | armv[0-9]*)
  220. echo arm
  221. return 0
  222. ;;
  223. aarch64)
  224. echo arm64
  225. return 0
  226. ;;
  227. mips*)
  228. echo >&2 "I see you are running on mips but I don't know how to determine endianness yet, so I cannot select a correct arch to fetch."
  229. echo >&2 "Consider installing \"go\" on the system which I can use to determine the correct arch or specify it explicitly by setting TARGETARCH"
  230. exit 1
  231. ;;
  232. *)
  233. echo "${uArch}"
  234. return 0
  235. ;;
  236. esac
  237. fi
  238. # default value
  239. echo >&2 "Unable to determine CPU arch, falling back to amd64. You can specify a target arch by setting TARGETARCH"
  240. echo amd64
  241. }
  242. get_target_variant() {
  243. echo "${TARGETVARIANT:-}"
  244. }
  245. while [ $# -gt 0 ]; do
  246. imageTag="$1"
  247. shift
  248. image="${imageTag%%[:@]*}"
  249. imageTag="${imageTag#*:}"
  250. digest="${imageTag##*@}"
  251. tag="${imageTag%%@*}"
  252. # add prefix library if passed official image
  253. if [[ "$image" != *"/"* ]]; then
  254. image="library/$image"
  255. fi
  256. imageFile="${image//\//_}" # "/" can't be in filenames :)
  257. token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
  258. manifestJson="$(
  259. curl -fsSL \
  260. -H "Authorization: Bearer $token" \
  261. -H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
  262. -H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
  263. -H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
  264. "$registryBase/v2/$image/manifests/$digest"
  265. )"
  266. if [ "${manifestJson:0:1}" != '{' ]; then
  267. echo >&2 "error: /v2/$image/manifests/$digest returned something unexpected:"
  268. echo >&2 " $manifestJson"
  269. exit 1
  270. fi
  271. imageIdentifier="$image:$tag@$digest"
  272. schemaVersion="$(echo "$manifestJson" | jq --raw-output '.schemaVersion')"
  273. case "$schemaVersion" in
  274. 2)
  275. mediaType="$(echo "$manifestJson" | jq --raw-output '.mediaType')"
  276. case "$mediaType" in
  277. application/vnd.docker.distribution.manifest.v2+json)
  278. handle_single_manifest_v2 "$manifestJson"
  279. ;;
  280. application/vnd.docker.distribution.manifest.list.v2+json)
  281. layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.manifests[]')"
  282. IFS="$newlineIFS"
  283. mapfile -t layers <<< "$layersFs"
  284. unset IFS
  285. found=""
  286. targetArch="$(get_target_arch)"
  287. targetVariant="$(get_target_variant)"
  288. # parse first level multi-arch manifest
  289. for i in "${!layers[@]}"; do
  290. layerMeta="${layers[$i]}"
  291. maniArch="$(echo "$layerMeta" | jq --raw-output '.platform.architecture')"
  292. maniVariant="$(echo "$layerMeta" | jq --raw-output '.platform.variant')"
  293. if [[ "$maniArch" = "${targetArch}" ]] && [[ -z "${targetVariant}" || "$maniVariant" = "${targetVariant}" ]]; then
  294. digest="$(echo "$layerMeta" | jq --raw-output '.digest')"
  295. # get second level single manifest
  296. submanifestJson="$(
  297. curl -fsSL \
  298. -H "Authorization: Bearer $token" \
  299. -H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
  300. -H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
  301. -H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
  302. "$registryBase/v2/$image/manifests/$digest"
  303. )"
  304. handle_single_manifest_v2 "$submanifestJson"
  305. found="found"
  306. break
  307. fi
  308. done
  309. if [ -z "$found" ]; then
  310. echo >&2 "error: manifest for ${targetArch}${targetVariant:+/${targetVariant}} is not found"
  311. exit 1
  312. fi
  313. ;;
  314. *)
  315. echo >&2 "error: unknown manifest mediaType ($imageIdentifier): '$mediaType'"
  316. exit 1
  317. ;;
  318. esac
  319. ;;
  320. 1)
  321. if [ -z "$doNotGenerateManifestJson" ]; then
  322. echo >&2 "warning: '$imageIdentifier' uses schemaVersion '$schemaVersion'"
  323. echo >&2 " this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)"
  324. echo >&2
  325. doNotGenerateManifestJson=1
  326. fi
  327. layersFs="$(echo "$manifestJson" | jq --raw-output '.fsLayers | .[] | .blobSum')"
  328. IFS="$newlineIFS"
  329. mapfile -t layers <<< "$layersFs"
  330. unset IFS
  331. history="$(echo "$manifestJson" | jq '.history | [.[] | .v1Compatibility]')"
  332. imageId="$(echo "$history" | jq --raw-output '.[0]' | jq --raw-output '.id')"
  333. echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
  334. for i in "${!layers[@]}"; do
  335. imageJson="$(echo "$history" | jq --raw-output ".[${i}]")"
  336. layerId="$(echo "$imageJson" | jq --raw-output '.id')"
  337. imageLayer="${layers[$i]}"
  338. mkdir -p "$dir/$layerId"
  339. echo '1.0' > "$dir/$layerId/VERSION"
  340. echo "$imageJson" > "$dir/$layerId/json"
  341. # TODO figure out why "-C -" doesn't work here
  342. # "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
  343. # "HTTP/1.1 416 Requested Range Not Satisfiable"
  344. if [ -f "$dir/$layerId/layer.tar" ]; then
  345. # TODO hackpatch for no -C support :'(
  346. echo "skipping existing ${layerId:0:12}"
  347. continue
  348. fi
  349. token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
  350. fetch_blob "$token" "$image" "$imageLayer" "$dir/$layerId/layer.tar" --progress-bar
  351. done
  352. ;;
  353. *)
  354. echo >&2 "error: unknown manifest schemaVersion ($imageIdentifier): '$schemaVersion'"
  355. exit 1
  356. ;;
  357. esac
  358. echo
  359. if [ -s "$dir/tags-$imageFile.tmp" ]; then
  360. echo -n ', ' >> "$dir/tags-$imageFile.tmp"
  361. else
  362. images=("${images[@]}" "$image")
  363. fi
  364. echo -n '"'"$tag"'": "'"$imageId"'"' >> "$dir/tags-$imageFile.tmp"
  365. done
  366. echo -n '{' > "$dir/repositories"
  367. firstImage=1
  368. for image in "${images[@]}"; do
  369. imageFile="${image//\//_}" # "/" can't be in filenames :)
  370. image="${image#library\/}"
  371. [ "$firstImage" ] || echo -n ',' >> "$dir/repositories"
  372. firstImage=
  373. echo -n $'\n\t' >> "$dir/repositories"
  374. echo -n '"'"$image"'": { '"$(cat "$dir/tags-$imageFile.tmp")"' }' >> "$dir/repositories"
  375. done
  376. echo -n $'\n}\n' >> "$dir/repositories"
  377. rm -f "$dir"/tags-*.tmp
  378. if [ -z "$doNotGenerateManifestJson" ] && [ "${#manifestJsonEntries[@]}" -gt 0 ]; then
  379. echo '[]' | jq --raw-output ".$(for entry in "${manifestJsonEntries[@]}"; do echo " + [ $entry ]"; done)" > "$dir/manifest.json"
  380. else
  381. rm -f "$dir/manifest.json"
  382. fi
  383. echo "Download of images into '$dir' complete."
  384. echo "Use something like the following to load the result into a Docker daemon:"
  385. echo " tar -cC '$dir' . | docker load"