123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444 |
- #!/usr/bin/env bash
- set -eo pipefail
- # This script was developed for use in Moby's CI, and as such the use cases and
- # usability are (intentionally) limited. You may find this script useful for
- # educational purposes, for example, to learn how pulling images works "under
- # the hood", and you may be able to use it for other purposes, but it should not
- # be considered a "general purpose" tool for pulling images.
- #
- # The project maintainers accept contributions to this script within its intended
- # scope, but may not accept contributions beyond that.
- #
- # For users who have a similar need but require more flexibility/functionality,
- # refer the the discussion on GitHub, which mentions various alternatives that
- # are more suitable for other uses: https://github.com/moby/moby/issues/40857
- # hello-world latest ef872312fe1b 3 months ago 910 B
- # hello-world latest ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9 3 months ago 910 B
- # debian latest f6fab3b798be 10 weeks ago 85.1 MB
- # debian latest f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd 10 weeks ago 85.1 MB
- # check if essential commands are in our PATH
- for cmd in curl jq; do
- if ! command -v $cmd &> /dev/null; then
- echo >&2 "error: \"$cmd\" not found!"
- exit 1
- fi
- done
- usage() {
- echo "usage: $0 dir image[:tag][@digest] ..."
- echo " $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7"
- [ -z "$1" ] || exit "$1"
- }
- dir="$1" # dir for building tar in
- shift || usage 1 >&2
- if ! [ $# -gt 0 ] && [ "$dir" ]; then
- usage 2 >&2
- fi
- mkdir -p "$dir"
- # hacky workarounds for Bash 3 support (no associative arrays)
- images=()
- rm -f "$dir"/tags-*.tmp
- manifestJsonEntries=()
- doNotGenerateManifestJson=
- # repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."'
- # bash v4 on Windows CI requires CRLF separator... and linux doesn't seem to care either way
- newlineIFS=$'\n'
- major=$(echo "${BASH_VERSION%%[^0.9]}" | cut -d. -f1)
- if [ "$major" -ge 4 ]; then
- newlineIFS=$'\r\n'
- fi
- registryBase='https://registry-1.docker.io'
- authBase='https://auth.docker.io'
- authService='registry.docker.io'
- # https://github.com/moby/moby/issues/33700
- fetch_blob() {
- local token="$1"
- shift
- local image="$1"
- shift
- local digest="$1"
- shift
- local targetFile="$1"
- shift
- local curlArgs=("$@")
- local curlHeaders
- curlHeaders="$(
- curl -S "${curlArgs[@]}" \
- -H "Authorization: Bearer $token" \
- "$registryBase/v2/$image/blobs/$digest" \
- -o "$targetFile" \
- -D-
- )"
- curlHeaders="$(echo "$curlHeaders" | tr -d '\r')"
- if grep -qE "^HTTP/[0-9].[0-9] 3" <<< "$curlHeaders"; then
- rm -f "$targetFile"
- local blobRedirect
- blobRedirect="$(echo "$curlHeaders" | awk -F ': ' 'tolower($1) == "location" { print $2; exit }')"
- if [ -z "$blobRedirect" ]; then
- echo >&2 "error: failed fetching '$image' blob '$digest'"
- echo "$curlHeaders" | head -1 >&2
- return 1
- fi
- curl -fSL "${curlArgs[@]}" \
- "$blobRedirect" \
- -o "$targetFile"
- fi
- }
- # handle 'application/vnd.docker.distribution.manifest.v2+json' manifest
- handle_single_manifest_v2() {
- local manifestJson="$1"
- shift
- local configDigest
- configDigest="$(echo "$manifestJson" | jq --raw-output '.config.digest')"
- local imageId="${configDigest#*:}" # strip off "sha256:"
- local configFile="$imageId.json"
- fetch_blob "$token" "$image" "$configDigest" "$dir/$configFile" -s
- local layersFs
- layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.layers[]')"
- local IFS="$newlineIFS"
- local layers
- mapfile -t layers <<< "$layersFs"
- unset IFS
- echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
- local layerId=
- local layerFiles=()
- for i in "${!layers[@]}"; do
- local layerMeta="${layers[$i]}"
- local layerMediaType
- layerMediaType="$(echo "$layerMeta" | jq --raw-output '.mediaType')"
- local layerDigest
- layerDigest="$(echo "$layerMeta" | jq --raw-output '.digest')"
- # save the previous layer's ID
- local parentId="$layerId"
- # create a new fake layer ID based on this layer's digest and the previous layer's fake ID
- layerId="$(echo "$parentId"$'\n'"$layerDigest" | sha256sum | cut -d' ' -f1)"
- # this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value)
- mkdir -p "$dir/$layerId"
- echo '1.0' > "$dir/$layerId/VERSION"
- if [ ! -s "$dir/$layerId/json" ]; then
- local parentJson
- parentJson="$(printf ', parent: "%s"' "$parentId")"
- local addJson
- addJson="$(printf '{ id: "%s"%s }' "$layerId" "${parentId:+$parentJson}")"
- # this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers
- jq "$addJson + ." > "$dir/$layerId/json" <<- 'EOJSON'
- {
- "created": "0001-01-01T00:00:00Z",
- "container_config": {
- "Hostname": "",
- "Domainname": "",
- "User": "",
- "AttachStdin": false,
- "AttachStdout": false,
- "AttachStderr": false,
- "Tty": false,
- "OpenStdin": false,
- "StdinOnce": false,
- "Env": null,
- "Cmd": null,
- "Image": "",
- "Volumes": null,
- "WorkingDir": "",
- "Entrypoint": null,
- "OnBuild": null,
- "Labels": null
- }
- }
- EOJSON
- fi
- case "$layerMediaType" in
- application/vnd.docker.image.rootfs.diff.tar.gzip)
- local layerTar="$layerId/layer.tar"
- layerFiles=("${layerFiles[@]}" "$layerTar")
- # TODO figure out why "-C -" doesn't work here
- # "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
- # "HTTP/1.1 416 Requested Range Not Satisfiable"
- if [ -f "$dir/$layerTar" ]; then
- # TODO hackpatch for no -C support :'(
- echo "skipping existing ${layerId:0:12}"
- continue
- fi
- local token
- token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
- fetch_blob "$token" "$image" "$layerDigest" "$dir/$layerTar" --progress-bar
- ;;
- *)
- echo >&2 "error: unknown layer mediaType ($imageIdentifier, $layerDigest): '$layerMediaType'"
- exit 1
- ;;
- esac
- done
- # change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons)
- imageId="$layerId"
- # munge the top layer image manifest to have the appropriate image configuration for older daemons
- local imageOldConfig
- imageOldConfig="$(jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' "$dir/$imageId/json")"
- jq --raw-output "$imageOldConfig + del(.history, .rootfs)" "$dir/$configFile" > "$dir/$imageId/json"
- local manifestJsonEntry
- manifestJsonEntry="$(
- echo '{}' | jq --raw-output '. + {
- Config: "'"$configFile"'",
- RepoTags: ["'"${image#library\/}:$tag"'"],
- Layers: '"$(echo '[]' | jq --raw-output ".$(for layerFile in "${layerFiles[@]}"; do echo " + [ \"$layerFile\" ]"; done)")"'
- }'
- )"
- manifestJsonEntries=("${manifestJsonEntries[@]}" "$manifestJsonEntry")
- }
- get_target_arch() {
- if [ -n "${TARGETARCH:-}" ]; then
- echo "${TARGETARCH}"
- return 0
- fi
- if type go > /dev/null; then
- go env GOARCH
- return 0
- fi
- if type dpkg > /dev/null; then
- debArch="$(dpkg --print-architecture)"
- case "${debArch}" in
- armel | armhf)
- echo "arm"
- return 0
- ;;
- *64el)
- echo "${debArch%el}le"
- return 0
- ;;
- *)
- echo "${debArch}"
- return 0
- ;;
- esac
- fi
- if type uname > /dev/null; then
- uArch="$(uname -m)"
- case "${uArch}" in
- x86_64)
- echo amd64
- return 0
- ;;
- arm | armv[0-9]*)
- echo arm
- return 0
- ;;
- aarch64)
- echo arm64
- return 0
- ;;
- mips*)
- echo >&2 "I see you are running on mips but I don't know how to determine endianness yet, so I cannot select a correct arch to fetch."
- echo >&2 "Consider installing \"go\" on the system which I can use to determine the correct arch or specify it explicitly by setting TARGETARCH"
- exit 1
- ;;
- *)
- echo "${uArch}"
- return 0
- ;;
- esac
- fi
- # default value
- echo >&2 "Unable to determine CPU arch, falling back to amd64. You can specify a target arch by setting TARGETARCH"
- echo amd64
- }
- get_target_variant() {
- echo "${TARGETVARIANT:-}"
- }
- while [ $# -gt 0 ]; do
- imageTag="$1"
- shift
- image="${imageTag%%[:@]*}"
- imageTag="${imageTag#*:}"
- digest="${imageTag##*@}"
- tag="${imageTag%%@*}"
- # add prefix library if passed official image
- if [[ "$image" != *"/"* ]]; then
- image="library/$image"
- fi
- imageFile="${image//\//_}" # "/" can't be in filenames :)
- token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
- manifestJson="$(
- curl -fsSL \
- -H "Authorization: Bearer $token" \
- -H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
- -H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
- -H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
- "$registryBase/v2/$image/manifests/$digest"
- )"
- if [ "${manifestJson:0:1}" != '{' ]; then
- echo >&2 "error: /v2/$image/manifests/$digest returned something unexpected:"
- echo >&2 " $manifestJson"
- exit 1
- fi
- imageIdentifier="$image:$tag@$digest"
- schemaVersion="$(echo "$manifestJson" | jq --raw-output '.schemaVersion')"
- case "$schemaVersion" in
- 2)
- mediaType="$(echo "$manifestJson" | jq --raw-output '.mediaType')"
- case "$mediaType" in
- application/vnd.docker.distribution.manifest.v2+json)
- handle_single_manifest_v2 "$manifestJson"
- ;;
- application/vnd.docker.distribution.manifest.list.v2+json)
- layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.manifests[]')"
- IFS="$newlineIFS"
- mapfile -t layers <<< "$layersFs"
- unset IFS
- found=""
- targetArch="$(get_target_arch)"
- targetVariant="$(get_target_variant)"
- # parse first level multi-arch manifest
- for i in "${!layers[@]}"; do
- layerMeta="${layers[$i]}"
- maniArch="$(echo "$layerMeta" | jq --raw-output '.platform.architecture')"
- maniVariant="$(echo "$layerMeta" | jq --raw-output '.platform.variant')"
- if [[ "$maniArch" = "${targetArch}" ]] && [[ -z "${targetVariant}" || "$maniVariant" = "${targetVariant}" ]]; then
- digest="$(echo "$layerMeta" | jq --raw-output '.digest')"
- # get second level single manifest
- submanifestJson="$(
- curl -fsSL \
- -H "Authorization: Bearer $token" \
- -H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
- -H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
- -H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
- "$registryBase/v2/$image/manifests/$digest"
- )"
- handle_single_manifest_v2 "$submanifestJson"
- found="found"
- break
- fi
- done
- if [ -z "$found" ]; then
- echo >&2 "error: manifest for ${targetArch}${targetVariant:+/${targetVariant}} is not found"
- exit 1
- fi
- ;;
- *)
- echo >&2 "error: unknown manifest mediaType ($imageIdentifier): '$mediaType'"
- exit 1
- ;;
- esac
- ;;
- 1)
- if [ -z "$doNotGenerateManifestJson" ]; then
- echo >&2 "warning: '$imageIdentifier' uses schemaVersion '$schemaVersion'"
- echo >&2 " this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)"
- echo >&2
- doNotGenerateManifestJson=1
- fi
- layersFs="$(echo "$manifestJson" | jq --raw-output '.fsLayers | .[] | .blobSum')"
- IFS="$newlineIFS"
- mapfile -t layers <<< "$layersFs"
- unset IFS
- history="$(echo "$manifestJson" | jq '.history | [.[] | .v1Compatibility]')"
- imageId="$(echo "$history" | jq --raw-output '.[0]' | jq --raw-output '.id')"
- echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
- for i in "${!layers[@]}"; do
- imageJson="$(echo "$history" | jq --raw-output ".[${i}]")"
- layerId="$(echo "$imageJson" | jq --raw-output '.id')"
- imageLayer="${layers[$i]}"
- mkdir -p "$dir/$layerId"
- echo '1.0' > "$dir/$layerId/VERSION"
- echo "$imageJson" > "$dir/$layerId/json"
- # TODO figure out why "-C -" doesn't work here
- # "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
- # "HTTP/1.1 416 Requested Range Not Satisfiable"
- if [ -f "$dir/$layerId/layer.tar" ]; then
- # TODO hackpatch for no -C support :'(
- echo "skipping existing ${layerId:0:12}"
- continue
- fi
- token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
- fetch_blob "$token" "$image" "$imageLayer" "$dir/$layerId/layer.tar" --progress-bar
- done
- ;;
- *)
- echo >&2 "error: unknown manifest schemaVersion ($imageIdentifier): '$schemaVersion'"
- exit 1
- ;;
- esac
- echo
- if [ -s "$dir/tags-$imageFile.tmp" ]; then
- echo -n ', ' >> "$dir/tags-$imageFile.tmp"
- else
- images=("${images[@]}" "$image")
- fi
- echo -n '"'"$tag"'": "'"$imageId"'"' >> "$dir/tags-$imageFile.tmp"
- done
- echo -n '{' > "$dir/repositories"
- firstImage=1
- for image in "${images[@]}"; do
- imageFile="${image//\//_}" # "/" can't be in filenames :)
- image="${image#library\/}"
- [ "$firstImage" ] || echo -n ',' >> "$dir/repositories"
- firstImage=
- echo -n $'\n\t' >> "$dir/repositories"
- echo -n '"'"$image"'": { '"$(cat "$dir/tags-$imageFile.tmp")"' }' >> "$dir/repositories"
- done
- echo -n $'\n}\n' >> "$dir/repositories"
- rm -f "$dir"/tags-*.tmp
- if [ -z "$doNotGenerateManifestJson" ] && [ "${#manifestJsonEntries[@]}" -gt 0 ]; then
- echo '[]' | jq --raw-output ".$(for entry in "${manifestJsonEntries[@]}"; do echo " + [ $entry ]"; done)" > "$dir/manifest.json"
- else
- rm -f "$dir/manifest.json"
- fi
- echo "Download of images into '$dir' complete."
- echo "Use something like the following to load the result into a Docker daemon:"
- echo " tar -cC '$dir' . | docker load"
|