Bladeren bron

Make image (layer) downloads faster by using pigz

The Golang built-in gzip library is serialized, and fairly slow
at decompressing. It also only decompresses on demand, versus
pipelining decompression.

This change switches to using the pigz external command
for gzip decompression, as opposed to using the built-in
golang one. This code is not vendored, but will be used
if it autodetected as part of the OS.

This also switches to using context, versus a manually
managed channel to manage cancellations, and synchronization.
There is a little bit of weirdness around manually having
to cancel in the error cases.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Sargun Dhillon 7 jaren geleden
bovenliggende
commit
fd35494a25
10 gewijzigde bestanden met toevoegingen van 109 en 21 verwijderingen
  1. 1 0
      Dockerfile
  2. 1 0
      Dockerfile.aarch64
  3. 1 0
      Dockerfile.armhf
  4. 1 0
      Dockerfile.e2e
  5. 1 0
      Dockerfile.ppc64le
  6. 1 0
      Dockerfile.s390x
  7. 1 0
      Dockerfile.simple
  8. 52 14
      pkg/archive/archive.go
  9. 43 4
      pkg/archive/archive_test.go
  10. 7 3
      pkg/ioutils/readers.go

+ 1 - 0
Dockerfile

@@ -62,6 +62,7 @@ RUN apt-get update && apt-get install -y \
 	libudev-dev \
 	libudev-dev \
 	mercurial \
 	mercurial \
 	net-tools \
 	net-tools \
+	pigz \
 	pkg-config \
 	pkg-config \
 	protobuf-compiler \
 	protobuf-compiler \
 	protobuf-c-compiler \
 	protobuf-c-compiler \

+ 1 - 0
Dockerfile.aarch64

@@ -52,6 +52,7 @@ RUN apt-get update && apt-get install -y \
 	libudev-dev \
 	libudev-dev \
 	mercurial \
 	mercurial \
 	net-tools \
 	net-tools \
+	pigz \
 	pkg-config \
 	pkg-config \
 	protobuf-compiler \
 	protobuf-compiler \
 	protobuf-c-compiler \
 	protobuf-c-compiler \

+ 1 - 0
Dockerfile.armhf

@@ -45,6 +45,7 @@ RUN apt-get update && apt-get install -y \
 	libtool \
 	libtool \
 	libudev-dev \
 	libudev-dev \
 	mercurial \
 	mercurial \
+	pigz \
 	pkg-config \
 	pkg-config \
 	python-backports.ssl-match-hostname \
 	python-backports.ssl-match-hostname \
 	python-dev \
 	python-dev \

+ 1 - 0
Dockerfile.e2e

@@ -47,6 +47,7 @@ RUN apk add --update \
     g++ \
     g++ \
     git \
     git \
     iptables \
     iptables \
+    pigz \
     tar \
     tar \
     xz \
     xz \
     && rm -rf /var/cache/apk/*
     && rm -rf /var/cache/apk/*

+ 1 - 0
Dockerfile.ppc64le

@@ -46,6 +46,7 @@ RUN apt-get update && apt-get install -y \
 	libtool \
 	libtool \
 	libudev-dev \
 	libudev-dev \
 	mercurial \
 	mercurial \
+	pigz \
 	pkg-config \
 	pkg-config \
 	python-backports.ssl-match-hostname \
 	python-backports.ssl-match-hostname \
 	python-dev \
 	python-dev \

+ 1 - 0
Dockerfile.s390x

@@ -42,6 +42,7 @@ RUN apt-get update && apt-get install -y \
 	libtool \
 	libtool \
 	libudev-dev \
 	libudev-dev \
 	mercurial \
 	mercurial \
+	pigz \
 	pkg-config \
 	pkg-config \
 	python-backports.ssl-match-hostname \
 	python-backports.ssl-match-hostname \
 	python-dev \
 	python-dev \

+ 1 - 0
Dockerfile.simple

@@ -28,6 +28,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 		e2fsprogs \
 		e2fsprogs \
 		iptables \
 		iptables \
 		pkg-config \
 		pkg-config \
+		pigz \
 		procps \
 		procps \
 		xfsprogs \
 		xfsprogs \
 		xz-utils \
 		xz-utils \

+ 52 - 14
pkg/archive/archive.go

@@ -6,6 +6,7 @@ import (
 	"bytes"
 	"bytes"
 	"compress/bzip2"
 	"compress/bzip2"
 	"compress/gzip"
 	"compress/gzip"
+	"context"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
@@ -13,6 +14,7 @@ import (
 	"os/exec"
 	"os/exec"
 	"path/filepath"
 	"path/filepath"
 	"runtime"
 	"runtime"
+	"strconv"
 	"strings"
 	"strings"
 	"syscall"
 	"syscall"
 
 
@@ -24,6 +26,17 @@ import (
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 )
 )
 
 
+var unpigzPath string
+
+func init() {
+	if path, err := exec.LookPath("unpigz"); err != nil {
+		logrus.Debug("unpigz binary not found in PATH, falling back to go gzip library")
+	} else {
+		logrus.Debugf("Using unpigz binary found at path %s", path)
+		unpigzPath = path
+	}
+}
+
 type (
 type (
 	// Compression is the state represents if compressed or not.
 	// Compression is the state represents if compressed or not.
 	Compression int
 	Compression int
@@ -136,10 +149,34 @@ func DetectCompression(source []byte) Compression {
 	return Uncompressed
 	return Uncompressed
 }
 }
 
 
-func xzDecompress(archive io.Reader) (io.ReadCloser, <-chan struct{}, error) {
+func xzDecompress(ctx context.Context, archive io.Reader) (io.ReadCloser, error) {
 	args := []string{"xz", "-d", "-c", "-q"}
 	args := []string{"xz", "-d", "-c", "-q"}
 
 
-	return cmdStream(exec.Command(args[0], args[1:]...), archive)
+	return cmdStream(exec.CommandContext(ctx, args[0], args[1:]...), archive)
+}
+
+func gzDecompress(ctx context.Context, buf io.Reader) (io.ReadCloser, error) {
+	if unpigzPath == "" {
+		return gzip.NewReader(buf)
+	}
+
+	disablePigzEnv := os.Getenv("MOBY_DISABLE_PIGZ")
+	if disablePigzEnv != "" {
+		if disablePigz, err := strconv.ParseBool(disablePigzEnv); err != nil {
+			return nil, err
+		} else if disablePigz {
+			return gzip.NewReader(buf)
+		}
+	}
+
+	return cmdStream(exec.CommandContext(ctx, unpigzPath, "-d", "-c"), buf)
+}
+
+func wrapReadCloser(readBuf io.ReadCloser, cancel context.CancelFunc) io.ReadCloser {
+	return ioutils.NewReadCloserWrapper(readBuf, func() error {
+		cancel()
+		return readBuf.Close()
+	})
 }
 }
 
 
 // DecompressStream decompresses the archive and returns a ReaderCloser with the decompressed archive.
 // DecompressStream decompresses the archive and returns a ReaderCloser with the decompressed archive.
@@ -163,26 +200,29 @@ func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
 		readBufWrapper := p.NewReadCloserWrapper(buf, buf)
 		readBufWrapper := p.NewReadCloserWrapper(buf, buf)
 		return readBufWrapper, nil
 		return readBufWrapper, nil
 	case Gzip:
 	case Gzip:
-		gzReader, err := gzip.NewReader(buf)
+		ctx, cancel := context.WithCancel(context.Background())
+
+		gzReader, err := gzDecompress(ctx, buf)
 		if err != nil {
 		if err != nil {
+			cancel()
 			return nil, err
 			return nil, err
 		}
 		}
 		readBufWrapper := p.NewReadCloserWrapper(buf, gzReader)
 		readBufWrapper := p.NewReadCloserWrapper(buf, gzReader)
-		return readBufWrapper, nil
+		return wrapReadCloser(readBufWrapper, cancel), nil
 	case Bzip2:
 	case Bzip2:
 		bz2Reader := bzip2.NewReader(buf)
 		bz2Reader := bzip2.NewReader(buf)
 		readBufWrapper := p.NewReadCloserWrapper(buf, bz2Reader)
 		readBufWrapper := p.NewReadCloserWrapper(buf, bz2Reader)
 		return readBufWrapper, nil
 		return readBufWrapper, nil
 	case Xz:
 	case Xz:
-		xzReader, chdone, err := xzDecompress(buf)
+		ctx, cancel := context.WithCancel(context.Background())
+
+		xzReader, err := xzDecompress(ctx, buf)
 		if err != nil {
 		if err != nil {
+			cancel()
 			return nil, err
 			return nil, err
 		}
 		}
 		readBufWrapper := p.NewReadCloserWrapper(buf, xzReader)
 		readBufWrapper := p.NewReadCloserWrapper(buf, xzReader)
-		return ioutils.NewReadCloserWrapper(readBufWrapper, func() error {
-			<-chdone
-			return readBufWrapper.Close()
-		}), nil
+		return wrapReadCloser(readBufWrapper, cancel), nil
 	default:
 	default:
 		return nil, fmt.Errorf("Unsupported compression format %s", (&compression).Extension())
 		return nil, fmt.Errorf("Unsupported compression format %s", (&compression).Extension())
 	}
 	}
@@ -1163,8 +1203,7 @@ func remapIDs(idMappings *idtools.IDMappings, hdr *tar.Header) error {
 // cmdStream executes a command, and returns its stdout as a stream.
 // cmdStream executes a command, and returns its stdout as a stream.
 // If the command fails to run or doesn't complete successfully, an error
 // If the command fails to run or doesn't complete successfully, an error
 // will be returned, including anything written on stderr.
 // will be returned, including anything written on stderr.
-func cmdStream(cmd *exec.Cmd, input io.Reader) (io.ReadCloser, <-chan struct{}, error) {
-	chdone := make(chan struct{})
+func cmdStream(cmd *exec.Cmd, input io.Reader) (io.ReadCloser, error) {
 	cmd.Stdin = input
 	cmd.Stdin = input
 	pipeR, pipeW := io.Pipe()
 	pipeR, pipeW := io.Pipe()
 	cmd.Stdout = pipeW
 	cmd.Stdout = pipeW
@@ -1173,7 +1212,7 @@ func cmdStream(cmd *exec.Cmd, input io.Reader) (io.ReadCloser, <-chan struct{},
 
 
 	// Run the command and return the pipe
 	// Run the command and return the pipe
 	if err := cmd.Start(); err != nil {
 	if err := cmd.Start(); err != nil {
-		return nil, nil, err
+		return nil, err
 	}
 	}
 
 
 	// Copy stdout to the returned pipe
 	// Copy stdout to the returned pipe
@@ -1183,10 +1222,9 @@ func cmdStream(cmd *exec.Cmd, input io.Reader) (io.ReadCloser, <-chan struct{},
 		} else {
 		} else {
 			pipeW.Close()
 			pipeW.Close()
 		}
 		}
-		close(chdone)
 	}()
 	}()
 
 
-	return pipeR, chdone, nil
+	return pipeR, nil
 }
 }
 
 
 // NewTempArchive reads the content of src into a temporary file, and returns the contents
 // NewTempArchive reads the content of src into a temporary file, and returns the contents

+ 43 - 4
pkg/archive/archive_test.go

@@ -3,6 +3,7 @@ package archive
 import (
 import (
 	"archive/tar"
 	"archive/tar"
 	"bytes"
 	"bytes"
+	"compress/gzip"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
@@ -15,6 +16,7 @@ import (
 	"time"
 	"time"
 
 
 	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/pkg/ioutils"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/require"
 )
 )
@@ -87,7 +89,7 @@ func TestIsArchivePathTar(t *testing.T) {
 	}
 	}
 }
 }
 
 
-func testDecompressStream(t *testing.T, ext, compressCommand string) {
+func testDecompressStream(t *testing.T, ext, compressCommand string) io.Reader {
 	cmd := exec.Command("sh", "-c",
 	cmd := exec.Command("sh", "-c",
 		fmt.Sprintf("touch /tmp/archive && %s /tmp/archive", compressCommand))
 		fmt.Sprintf("touch /tmp/archive && %s /tmp/archive", compressCommand))
 	output, err := cmd.CombinedOutput()
 	output, err := cmd.CombinedOutput()
@@ -111,6 +113,8 @@ func testDecompressStream(t *testing.T, ext, compressCommand string) {
 	if err = r.Close(); err != nil {
 	if err = r.Close(); err != nil {
 		t.Fatalf("Failed to close the decompressed stream: %v ", err)
 		t.Fatalf("Failed to close the decompressed stream: %v ", err)
 	}
 	}
+
+	return r
 }
 }
 
 
 func TestDecompressStreamGzip(t *testing.T) {
 func TestDecompressStreamGzip(t *testing.T) {
@@ -206,7 +210,7 @@ func TestExtensionXz(t *testing.T) {
 
 
 func TestCmdStreamLargeStderr(t *testing.T) {
 func TestCmdStreamLargeStderr(t *testing.T) {
 	cmd := exec.Command("sh", "-c", "dd if=/dev/zero bs=1k count=1000 of=/dev/stderr; echo hello")
 	cmd := exec.Command("sh", "-c", "dd if=/dev/zero bs=1k count=1000 of=/dev/stderr; echo hello")
-	out, _, err := cmdStream(cmd, nil)
+	out, err := cmdStream(cmd, nil)
 	if err != nil {
 	if err != nil {
 		t.Fatalf("Failed to start command: %s", err)
 		t.Fatalf("Failed to start command: %s", err)
 	}
 	}
@@ -231,7 +235,7 @@ func TestCmdStreamBad(t *testing.T) {
 		t.Skip("Failing on Windows CI machines")
 		t.Skip("Failing on Windows CI machines")
 	}
 	}
 	badCmd := exec.Command("sh", "-c", "echo hello; echo >&2 error couldn\\'t reverse the phase pulser; exit 1")
 	badCmd := exec.Command("sh", "-c", "echo hello; echo >&2 error couldn\\'t reverse the phase pulser; exit 1")
-	out, _, err := cmdStream(badCmd, nil)
+	out, err := cmdStream(badCmd, nil)
 	if err != nil {
 	if err != nil {
 		t.Fatalf("Failed to start command: %s", err)
 		t.Fatalf("Failed to start command: %s", err)
 	}
 	}
@@ -246,7 +250,7 @@ func TestCmdStreamBad(t *testing.T) {
 
 
 func TestCmdStreamGood(t *testing.T) {
 func TestCmdStreamGood(t *testing.T) {
 	cmd := exec.Command("sh", "-c", "echo hello; exit 0")
 	cmd := exec.Command("sh", "-c", "echo hello; exit 0")
-	out, _, err := cmdStream(cmd, nil)
+	out, err := cmdStream(cmd, nil)
 	if err != nil {
 	if err != nil {
 		t.Fatal(err)
 		t.Fatal(err)
 	}
 	}
@@ -1318,3 +1322,38 @@ func readFileFromArchive(t *testing.T, archive io.ReadCloser, name string, expec
 	assert.NoError(t, err)
 	assert.NoError(t, err)
 	return string(content)
 	return string(content)
 }
 }
+
+func TestDisablePigz(t *testing.T) {
+	_, err := exec.LookPath("unpigz")
+	if err != nil {
+		t.Log("Test will not check full path when Pigz not installed")
+	}
+
+	os.Setenv("MOBY_DISABLE_PIGZ", "true")
+	defer os.Unsetenv("MOBY_DISABLE_PIGZ")
+
+	r := testDecompressStream(t, "gz", "gzip -f")
+	// For the bufio pool
+	outsideReaderCloserWrapper := r.(*ioutils.ReadCloserWrapper)
+	// For the context canceller
+	contextReaderCloserWrapper := outsideReaderCloserWrapper.Reader.(*ioutils.ReadCloserWrapper)
+
+	assert.IsType(t, &gzip.Reader{}, contextReaderCloserWrapper.Reader)
+}
+
+func TestPigz(t *testing.T) {
+	r := testDecompressStream(t, "gz", "gzip -f")
+	// For the bufio pool
+	outsideReaderCloserWrapper := r.(*ioutils.ReadCloserWrapper)
+	// For the context canceller
+	contextReaderCloserWrapper := outsideReaderCloserWrapper.Reader.(*ioutils.ReadCloserWrapper)
+
+	_, err := exec.LookPath("unpigz")
+	if err == nil {
+		t.Log("Tested whether Pigz is used, as it installed")
+		assert.IsType(t, &io.PipeReader{}, contextReaderCloserWrapper.Reader)
+	} else {
+		t.Log("Tested whether Pigz is not used, as it not installed")
+		assert.IsType(t, &gzip.Reader{}, contextReaderCloserWrapper.Reader)
+	}
+}

+ 7 - 3
pkg/ioutils/readers.go

@@ -8,18 +8,22 @@ import (
 	"golang.org/x/net/context"
 	"golang.org/x/net/context"
 )
 )
 
 
-type readCloserWrapper struct {
+// ReadCloserWrapper wraps an io.Reader, and implements an io.ReadCloser
+// It calls the given callback function when closed. It should be constructed
+// with NewReadCloserWrapper
+type ReadCloserWrapper struct {
 	io.Reader
 	io.Reader
 	closer func() error
 	closer func() error
 }
 }
 
 
-func (r *readCloserWrapper) Close() error {
+// Close calls back the passed closer function
+func (r *ReadCloserWrapper) Close() error {
 	return r.closer()
 	return r.closer()
 }
 }
 
 
 // NewReadCloserWrapper returns a new io.ReadCloser.
 // NewReadCloserWrapper returns a new io.ReadCloser.
 func NewReadCloserWrapper(r io.Reader, closer func() error) io.ReadCloser {
 func NewReadCloserWrapper(r io.Reader, closer func() error) io.ReadCloser {
-	return &readCloserWrapper{
+	return &ReadCloserWrapper{
 		Reader: r,
 		Reader: r,
 		closer: closer,
 		closer: closer,
 	}
 	}