浏览代码

compression: support zstd with skippable frame

As a matter of fact, there are two frame formats defined by Zstandard: Zstandard frames and Skippable frames.
So we should probably support zstd algorithms with skippable frames.
See https://tools.ietf.org/id/draft-kucherawy-dispatch-zstd-00.html#rfc.section.2 for more details.

Signed-off-by: Da McGrady <dabkb@aol.com>
Da McGrady 3 年之前
父节点
当前提交
23abee412b
共有 2 个文件被更改,包括 80 次插入7 次删除
  1. 52 7
      pkg/archive/archive.go
  2. 28 0
      pkg/archive/archive_test.go

+ 52 - 7
pkg/archive/archive.go

@@ -7,6 +7,7 @@ import (
 	"compress/bzip2"
 	"compress/gzip"
 	"context"
+	"encoding/binary"
 	"fmt"
 	"io"
 	"os"
@@ -124,15 +125,59 @@ func IsArchivePath(path string) bool {
 	return err == nil
 }
 
+const (
+	zstdMagicSkippableStart = 0x184D2A50
+	zstdMagicSkippableMask  = 0xFFFFFFF0
+)
+
+var (
+	bzip2Magic = []byte{0x42, 0x5A, 0x68}
+	gzipMagic  = []byte{0x1F, 0x8B, 0x08}
+	xzMagic    = []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}
+	zstdMagic  = []byte{0x28, 0xb5, 0x2f, 0xfd}
+)
+
+type matcher = func([]byte) bool
+
+func magicNumberMatcher(m []byte) matcher {
+	return func(source []byte) bool {
+		return bytes.HasPrefix(source, m)
+	}
+}
+
+// zstdMatcher detects zstd compression algorithm.
+// Zstandard compressed data is made of one or more frames.
+// There are two frame formats defined by Zstandard: Zstandard frames and Skippable frames.
+// See https://tools.ietf.org/id/draft-kucherawy-dispatch-zstd-00.html#rfc.section.2 for more details.
+func zstdMatcher() matcher {
+	return func(source []byte) bool {
+		if bytes.HasPrefix(source, zstdMagic) {
+			// Zstandard frame
+			return true
+		}
+		// skippable frame
+		if len(source) < 8 {
+			return false
+		}
+		// magic number from 0x184D2A50 to 0x184D2A5F.
+		if binary.LittleEndian.Uint32(source[:4])&zstdMagicSkippableMask == zstdMagicSkippableStart {
+			return true
+		}
+		return false
+	}
+}
+
 // DetectCompression detects the compression algorithm of the source.
 func DetectCompression(source []byte) Compression {
-	for compression, m := range map[Compression][]byte{
-		Bzip2: {0x42, 0x5A, 0x68},
-		Gzip:  {0x1F, 0x8B, 0x08},
-		Xz:    {0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00},
-		Zstd:  {0x28, 0xb5, 0x2f, 0xfd},
-	} {
-		if bytes.HasPrefix(source, m) {
+	compressionMap := map[Compression]matcher{
+		Bzip2: magicNumberMatcher(bzip2Magic),
+		Gzip:  magicNumberMatcher(gzipMagic),
+		Xz:    magicNumberMatcher(xzMagic),
+		Zstd:  zstdMatcher(),
+	}
+	for _, compression := range []Compression{Bzip2, Gzip, Xz, Zstd} {
+		fn := compressionMap[compression]
+		if fn(source) {
 			return compression
 		}
 	}

+ 28 - 0
pkg/archive/archive_test.go

@@ -700,6 +700,34 @@ func tarUntar(t *testing.T, origin string, options *TarOptions) ([]Change, error
 	return ChangesDirs(origin, tmp)
 }
 
+func TestDetectCompressionZstd(t *testing.T) {
+	// test zstd compression without skippable frames.
+	compressedData := []byte{
+		0x28, 0xb5, 0x2f, 0xfd, // magic number of Zstandard frame: 0xFD2FB528
+		0x04, 0x00, 0x31, 0x00, 0x00, // frame header
+		0x64, 0x6f, 0x63, 0x6b, 0x65, 0x72, // data block "docker"
+		0x16, 0x0e, 0x21, 0xc3, // content checksum
+	}
+	compression := DetectCompression(compressedData)
+	if compression != Zstd {
+		t.Fatal("Unexpected compression")
+	}
+	// test zstd compression with skippable frames.
+	hex := []byte{
+		0x50, 0x2a, 0x4d, 0x18, // magic number of skippable frame: 0x184D2A50 to 0x184D2A5F
+		0x04, 0x00, 0x00, 0x00, // frame size
+		0x5d, 0x00, 0x00, 0x00, // user data
+		0x28, 0xb5, 0x2f, 0xfd, // magic number of Zstandard frame: 0xFD2FB528
+		0x04, 0x00, 0x31, 0x00, 0x00, // frame header
+		0x64, 0x6f, 0x63, 0x6b, 0x65, 0x72, // data block "docker"
+		0x16, 0x0e, 0x21, 0xc3, // content checksum
+	}
+	compression = DetectCompression(hex)
+	if compression != Zstd {
+		t.Fatal("Unexpected compression")
+	}
+}
+
 func TestTarUntar(t *testing.T) {
 	origin, err := os.MkdirTemp("", "docker-test-untar-origin")
 	if err != nil {