hace 2 años · b98c05dc50
--- a/vendor.mod
+++ b/vendor.mod
@@ -53,7 +53,7 @@ require (
 
				 	github.com/hashicorp/serf v0.8.5
			
 
				 	github.com/imdario/mergo v0.3.13
			
 
				 	github.com/ishidawataru/sctp v0.0.0-20210707070123-9a39160e9062
			
 
				-	github.com/klauspost/compress v1.15.12
			
 
				+	github.com/klauspost/compress v1.16.3
			
 
				 	github.com/miekg/dns v1.1.43
			
 
				 	github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible
			
 
				 	github.com/moby/buildkit v0.11.4
			
--- a/vendor.sum
+++ b/vendor.sum
@@ -957,8 +957,9 @@ github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
 
				 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
			
 
				 github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
			
 
				 github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
			
 
				-github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM=
			
 
				 github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM=
			
 
				+github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
			
 
				+github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
			
 
				 github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
			
 
				 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
			
 
				 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
			
--- a/vendor/github.com/klauspost/compress/.goreleaser.yml
+++ b/vendor/github.com/klauspost/compress/.goreleaser.yml
@@ -3,7 +3,7 @@
 
				 before:
			
 
				   hooks:
			
 
				     - ./gen.sh
			
 
				-    - go install mvdan.cc/garble@latest
			
 
				+    - go install mvdan.cc/garble@v0.9.3
			
 
				 
			
 
				 builds:
			
 
				   -
			
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@@ -9,7 +9,6 @@ This package provides various compression algorithms.
 
				 * [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding.

			
 
				 * [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently.

			
 
				 * [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation.

			
 
				-* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here.

			
 
				 

			
 
				 [![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories)

			
 
				 [![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml)

			
@@ -17,6 +16,43 @@ This package provides various compression algorithms.
 
				 

			
 
				 # changelog

			
 
				 

			
 
				+* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)

			
 
				+	* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776

			
 
				+	* gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767

			
 
				+	* s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766

			
 
				+	* zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773

			
 
				+	* huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774

			
 
				+

			
 
				+* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0)

			
 
				+	* s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support.  https://github.com/klauspost/compress/pull/685

			
 
				+	* s2: Add Compression Size Estimate.  https://github.com/klauspost/compress/pull/752

			
 
				+	* s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755

			
 
				+	* s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748

			
 
				+	* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747

			
 
				+	* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746

			
 
				+

			
 
				+* Jan 21st, 2023 (v1.15.15)

			
 
				+	* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739

			
 
				+	* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728

			
 
				+	* zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745

			
 
				+	* gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740

			
 
				+

			
 
				+* Jan 3rd, 2023 (v1.15.14)

			
 
				+

			
 
				+	* flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718

			
 
				+	* zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720

			
 
				+	* export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722

			
 
				+	* s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723

			
 
				+

			
 
				+* Dec 11, 2022 (v1.15.13)

			
 
				+	* zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder  https://github.com/klauspost/compress/pull/691

			
 
				+	* zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708

			
 
				+

			
 
				+* Oct 26, 2022 (v1.15.12)

			
 
				+

			
 
				+	* zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680

			
 
				+	* gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683

			
 
				+

			
 
				 * Sept 26, 2022 (v1.15.11)

			
 
				 

			
 
				 	* flate: Improve level 1-3 compression  https://github.com/klauspost/compress/pull/678

			
--- a/vendor/github.com/klauspost/compress/fse/compress.go
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@@ -146,54 +146,51 @@ func (s *Scratch) compress(src []byte) error {
 
				 		c1.encodeZero(tt[src[ip-2]])
			
 
				 		ip -= 2
			
 
				 	}
			
 
				+	src = src[:ip]
			
 
				 
			
 
				 	// Main compression loop.
			
 
				 	switch {
			
 
				 	case !s.zeroBits && s.actualTableLog <= 8:
			
 
				 		// We can encode 4 symbols without requiring a flush.
			
 
				 		// We do not need to check if any output is 0 bits.
			
 
				-		for ip >= 4 {
			
 
				+		for ; len(src) >= 4; src = src[:len(src)-4] {
			
 
				 			s.bw.flush32()
			
 
				-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
			
 
				+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
			
 
				 			c2.encode(tt[v0])
			
 
				 			c1.encode(tt[v1])
			
 
				 			c2.encode(tt[v2])
			
 
				 			c1.encode(tt[v3])
			
 
				-			ip -= 4
			
 
				 		}
			
 
				 	case !s.zeroBits:
			
 
				 		// We do not need to check if any output is 0 bits.
			
 
				-		for ip >= 4 {
			
 
				+		for ; len(src) >= 4; src = src[:len(src)-4] {
			
 
				 			s.bw.flush32()
			
 
				-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
			
 
				+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
			
 
				 			c2.encode(tt[v0])
			
 
				 			c1.encode(tt[v1])
			
 
				 			s.bw.flush32()
			
 
				 			c2.encode(tt[v2])
			
 
				 			c1.encode(tt[v3])
			
 
				-			ip -= 4
			
 
				 		}
			
 
				 	case s.actualTableLog <= 8:
			
 
				 		// We can encode 4 symbols without requiring a flush
			
 
				-		for ip >= 4 {
			
 
				+		for ; len(src) >= 4; src = src[:len(src)-4] {
			
 
				 			s.bw.flush32()
			
 
				-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
			
 
				+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
			
 
				 			c2.encodeZero(tt[v0])
			
 
				 			c1.encodeZero(tt[v1])
			
 
				 			c2.encodeZero(tt[v2])
			
 
				 			c1.encodeZero(tt[v3])
			
 
				-			ip -= 4
			
 
				 		}
			
 
				 	default:
			
 
				-		for ip >= 4 {
			
 
				+		for ; len(src) >= 4; src = src[:len(src)-4] {
			
 
				 			s.bw.flush32()
			
 
				-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
			
 
				+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
			
 
				 			c2.encodeZero(tt[v0])
			
 
				 			c1.encodeZero(tt[v1])
			
 
				 			s.bw.flush32()
			
 
				 			c2.encodeZero(tt[v2])
			
 
				 			c1.encodeZero(tt[v3])
			
 
				-			ip -= 4
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -459,15 +456,17 @@ func (s *Scratch) countSimple(in []byte) (max int) {
 
				 	for _, v := range in {
			
 
				 		s.count[v]++
			
 
				 	}
			
 
				-	m := uint32(0)
			
 
				+	m, symlen := uint32(0), s.symbolLen
			
 
				 	for i, v := range s.count[:] {
			
 
				+		if v == 0 {
			
 
				+			continue
			
 
				+		}
			
 
				 		if v > m {
			
 
				 			m = v
			
 
				 		}
			
 
				-		if v > 0 {
			
 
				-			s.symbolLen = uint16(i) + 1
			
 
				-		}
			
 
				+		symlen = uint16(i) + 1
			
 
				 	}
			
 
				+	s.symbolLen = symlen
			
 
				 	return int(m)
			
 
				 }
			
 
				 
			
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -260,7 +260,9 @@ func (s *Scratch) buildDtable() error {
 
				 // If the buffer is over-read an error is returned.
			
 
				 func (s *Scratch) decompress() error {
			
 
				 	br := &s.bits
			
 
				-	br.init(s.br.unread())
			
 
				+	if err := br.init(s.br.unread()); err != nil {
			
 
				+		return err
			
 
				+	}
			
 
				 
			
 
				 	var s1, s2 decoder
			
 
				 	// Initialize and decode first state and symbol.
			
--- a/vendor/github.com/klauspost/compress/huff0/bitreader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@@ -67,7 +67,6 @@ func (b *bitReaderBytes) fillFast() {
 
				 
			
 
				 	// 2 bounds checks.
			
 
				 	v := b.in[b.off-4 : b.off]
			
 
				-	v = v[:4]
			
 
				 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
			
 
				 	b.value |= uint64(low) << (b.bitsRead - 32)
			
 
				 	b.bitsRead -= 32
			
@@ -88,8 +87,7 @@ func (b *bitReaderBytes) fill() {
 
				 		return
			
 
				 	}
			
 
				 	if b.off > 4 {
			
 
				-		v := b.in[b.off-4:]
			
 
				-		v = v[:4]
			
 
				+		v := b.in[b.off-4 : b.off]
			
 
				 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
			
 
				 		b.value |= uint64(low) << (b.bitsRead - 32)
			
 
				 		b.bitsRead -= 32
			
@@ -179,7 +177,6 @@ func (b *bitReaderShifted) fillFast() {
 
				 
			
 
				 	// 2 bounds checks.
			
 
				 	v := b.in[b.off-4 : b.off]
			
 
				-	v = v[:4]
			
 
				 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
			
 
				 	b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
			
 
				 	b.bitsRead -= 32
			
@@ -200,8 +197,7 @@ func (b *bitReaderShifted) fill() {
 
				 		return
			
 
				 	}
			
 
				 	if b.off > 4 {
			
 
				-		v := b.in[b.off-4:]
			
 
				-		v = v[:4]
			
 
				+		v := b.in[b.off-4 : b.off]
			
 
				 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
			
 
				 		b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
			
 
				 		b.bitsRead -= 32
			
--- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
@@ -60,6 +60,22 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
 
				 	b.nBits += encA.nBits + encB.nBits
			
 
				 }
			
 
				 
			
 
				+// encFourSymbols adds up to 32 bits from four symbols.
			
 
				+// It will not check if there is space for them,
			
 
				+// so the caller must ensure that b has been flushed recently.
			
 
				+func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) {
			
 
				+	bitsA := encA.nBits
			
 
				+	bitsB := bitsA + encB.nBits
			
 
				+	bitsC := bitsB + encC.nBits
			
 
				+	bitsD := bitsC + encD.nBits
			
 
				+	combined := uint64(encA.val) |
			
 
				+		(uint64(encB.val) << (bitsA & 63)) |
			
 
				+		(uint64(encC.val) << (bitsB & 63)) |
			
 
				+		(uint64(encD.val) << (bitsC & 63))
			
 
				+	b.bitContainer |= combined << (b.nBits & 63)
			
 
				+	b.nBits += bitsD
			
 
				+}
			
 
				+
			
 
				 // flush32 will flush out, so there are at least 32 bits available for writing.
			
 
				 func (b *bitWriter) flush32() {
			
 
				 	if b.nBits < 32 {
			
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -248,8 +248,7 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
 
				 			tmp := src[n : n+4]
			
 
				 			// tmp should be len 4
			
 
				 			bw.flush32()
			
 
				-			bw.encTwoSymbols(cTable, tmp[3], tmp[2])
			
 
				-			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
			
 
				+			bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]])
			
 
				 		}
			
 
				 	} else {
			
 
				 		for ; n >= 0; n -= 4 {
			
@@ -365,29 +364,29 @@ func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
 
				 	m := uint32(0)
			
 
				 	if len(s.prevTable) > 0 {
			
 
				 		for i, v := range s.count[:] {
			
 
				+			if v == 0 {
			
 
				+				continue
			
 
				+			}
			
 
				 			if v > m {
			
 
				 				m = v
			
 
				 			}
			
 
				-			if v > 0 {
			
 
				-				s.symbolLen = uint16(i) + 1
			
 
				-				if i >= len(s.prevTable) {
			
 
				-					reuse = false
			
 
				-				} else {
			
 
				-					if s.prevTable[i].nBits == 0 {
			
 
				-						reuse = false
			
 
				-					}
			
 
				-				}
			
 
				+			s.symbolLen = uint16(i) + 1
			
 
				+			if i >= len(s.prevTable) {
			
 
				+				reuse = false
			
 
				+			} else if s.prevTable[i].nBits == 0 {
			
 
				+				reuse = false
			
 
				 			}
			
 
				 		}
			
 
				 		return int(m), reuse
			
 
				 	}
			
 
				 	for i, v := range s.count[:] {
			
 
				+		if v == 0 {
			
 
				+			continue
			
 
				+		}
			
 
				 		if v > m {
			
 
				 			m = v
			
 
				 		}
			
 
				-		if v > 0 {
			
 
				-			s.symbolLen = uint16(i) + 1
			
 
				-		}
			
 
				+		s.symbolLen = uint16(i) + 1
			
 
				 	}
			
 
				 	return int(m), false
			
 
				 }
			
@@ -484,34 +483,35 @@ func (s *Scratch) buildCTable() error {
 
				 	// Different from reference implementation.
			
 
				 	huffNode0 := s.nodes[0 : huffNodesLen+1]
			
 
				 
			
 
				-	for huffNode[nonNullRank].count == 0 {
			
 
				+	for huffNode[nonNullRank].count() == 0 {
			
 
				 		nonNullRank--
			
 
				 	}
			
 
				 
			
 
				 	lowS := int16(nonNullRank)
			
 
				 	nodeRoot := nodeNb + lowS - 1
			
 
				 	lowN := nodeNb
			
 
				-	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
			
 
				-	huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
			
 
				+	huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count())
			
 
				+	huffNode[lowS].setParent(nodeNb)
			
 
				+	huffNode[lowS-1].setParent(nodeNb)
			
 
				 	nodeNb++
			
 
				 	lowS -= 2
			
 
				 	for n := nodeNb; n <= nodeRoot; n++ {
			
 
				-		huffNode[n].count = 1 << 30
			
 
				+		huffNode[n].setCount(1 << 30)
			
 
				 	}
			
 
				 	// fake entry, strong barrier
			
 
				-	huffNode0[0].count = 1 << 31
			
 
				+	huffNode0[0].setCount(1 << 31)
			
 
				 
			
 
				 	// create parents
			
 
				 	for nodeNb <= nodeRoot {
			
 
				 		var n1, n2 int16
			
 
				-		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
			
 
				+		if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
			
 
				 			n1 = lowS
			
 
				 			lowS--
			
 
				 		} else {
			
 
				 			n1 = lowN
			
 
				 			lowN++
			
 
				 		}
			
 
				-		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
			
 
				+		if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
			
 
				 			n2 = lowS
			
 
				 			lowS--
			
 
				 		} else {
			
@@ -519,18 +519,19 @@ func (s *Scratch) buildCTable() error {
 
				 			lowN++
			
 
				 		}
			
 
				 
			
 
				-		huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
			
 
				-		huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
			
 
				+		huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count())
			
 
				+		huffNode0[n1+1].setParent(nodeNb)
			
 
				+		huffNode0[n2+1].setParent(nodeNb)
			
 
				 		nodeNb++
			
 
				 	}
			
 
				 
			
 
				 	// distribute weights (unlimited tree height)
			
 
				-	huffNode[nodeRoot].nbBits = 0
			
 
				+	huffNode[nodeRoot].setNbBits(0)
			
 
				 	for n := nodeRoot - 1; n >= startNode; n-- {
			
 
				-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
			
 
				+		huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
			
 
				 	}
			
 
				 	for n := uint16(0); n <= nonNullRank; n++ {
			
 
				-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
			
 
				+		huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
			
 
				 	}
			
 
				 	s.actualTableLog = s.setMaxHeight(int(nonNullRank))
			
 
				 	maxNbBits := s.actualTableLog
			
@@ -542,7 +543,7 @@ func (s *Scratch) buildCTable() error {
 
				 	var nbPerRank [tableLogMax + 1]uint16
			
 
				 	var valPerRank [16]uint16
			
 
				 	for _, v := range huffNode[:nonNullRank+1] {
			
 
				-		nbPerRank[v.nbBits]++
			
 
				+		nbPerRank[v.nbBits()]++
			
 
				 	}
			
 
				 	// determine stating value per rank
			
 
				 	{
			
@@ -557,7 +558,7 @@ func (s *Scratch) buildCTable() error {
 
				 
			
 
				 	// push nbBits per symbol, symbol order
			
 
				 	for _, v := range huffNode[:nonNullRank+1] {
			
 
				-		s.cTable[v.symbol].nBits = v.nbBits
			
 
				+		s.cTable[v.symbol()].nBits = v.nbBits()
			
 
				 	}
			
 
				 
			
 
				 	// assign value within rank, symbol order
			
@@ -603,12 +604,12 @@ func (s *Scratch) huffSort() {
 
				 		pos := rank[r].current
			
 
				 		rank[r].current++
			
 
				 		prev := nodes[(pos-1)&huffNodesMask]
			
 
				-		for pos > rank[r].base && c > prev.count {
			
 
				+		for pos > rank[r].base && c > prev.count() {
			
 
				 			nodes[pos&huffNodesMask] = prev
			
 
				 			pos--
			
 
				 			prev = nodes[(pos-1)&huffNodesMask]
			
 
				 		}
			
 
				-		nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
			
 
				+		nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n))
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -617,7 +618,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
				 	huffNode := s.nodes[1 : huffNodesLen+1]
			
 
				 	//huffNode = huffNode[: huffNodesLen]
			
 
				 
			
 
				-	largestBits := huffNode[lastNonNull].nbBits
			
 
				+	largestBits := huffNode[lastNonNull].nbBits()
			
 
				 
			
 
				 	// early exit : no elt > maxNbBits
			
 
				 	if largestBits <= maxNbBits {
			
@@ -627,14 +628,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
				 	baseCost := int(1) << (largestBits - maxNbBits)
			
 
				 	n := uint32(lastNonNull)
			
 
				 
			
 
				-	for huffNode[n].nbBits > maxNbBits {
			
 
				-		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
			
 
				-		huffNode[n].nbBits = maxNbBits
			
 
				+	for huffNode[n].nbBits() > maxNbBits {
			
 
				+		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits()))
			
 
				+		huffNode[n].setNbBits(maxNbBits)
			
 
				 		n--
			
 
				 	}
			
 
				 	// n stops at huffNode[n].nbBits <= maxNbBits
			
 
				 
			
 
				-	for huffNode[n].nbBits == maxNbBits {
			
 
				+	for huffNode[n].nbBits() == maxNbBits {
			
 
				 		n--
			
 
				 	}
			
 
				 	// n end at index of smallest symbol using < maxNbBits
			
@@ -655,10 +656,10 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
				 		{
			
 
				 			currentNbBits := maxNbBits
			
 
				 			for pos := int(n); pos >= 0; pos-- {
			
 
				-				if huffNode[pos].nbBits >= currentNbBits {
			
 
				+				if huffNode[pos].nbBits() >= currentNbBits {
			
 
				 					continue
			
 
				 				}
			
 
				-				currentNbBits = huffNode[pos].nbBits // < maxNbBits
			
 
				+				currentNbBits = huffNode[pos].nbBits() // < maxNbBits
			
 
				 				rankLast[maxNbBits-currentNbBits] = uint32(pos)
			
 
				 			}
			
 
				 		}
			
@@ -675,8 +676,8 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
				 				if lowPos == noSymbol {
			
 
				 					break
			
 
				 				}
			
 
				-				highTotal := huffNode[highPos].count
			
 
				-				lowTotal := 2 * huffNode[lowPos].count
			
 
				+				highTotal := huffNode[highPos].count()
			
 
				+				lowTotal := 2 * huffNode[lowPos].count()
			
 
				 				if highTotal <= lowTotal {
			
 
				 					break
			
 
				 				}
			
@@ -692,13 +693,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
				 				// this rank is no longer empty
			
 
				 				rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
			
 
				 			}
			
 
				-			huffNode[rankLast[nBitsToDecrease]].nbBits++
			
 
				+			huffNode[rankLast[nBitsToDecrease]].setNbBits(1 +
			
 
				+				huffNode[rankLast[nBitsToDecrease]].nbBits())
			
 
				 			if rankLast[nBitsToDecrease] == 0 {
			
 
				 				/* special case, reached largest symbol */
			
 
				 				rankLast[nBitsToDecrease] = noSymbol
			
 
				 			} else {
			
 
				 				rankLast[nBitsToDecrease]--
			
 
				-				if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
			
 
				+				if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease {
			
 
				 					rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
			
 
				 				}
			
 
				 			}
			
@@ -706,15 +708,15 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
				 
			
 
				 		for totalCost < 0 { /* Sometimes, cost correction overshoot */
			
 
				 			if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
			
 
				-				for huffNode[n].nbBits == maxNbBits {
			
 
				+				for huffNode[n].nbBits() == maxNbBits {
			
 
				 					n--
			
 
				 				}
			
 
				-				huffNode[n+1].nbBits--
			
 
				+				huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1)
			
 
				 				rankLast[1] = n + 1
			
 
				 				totalCost++
			
 
				 				continue
			
 
				 			}
			
 
				-			huffNode[rankLast[1]+1].nbBits--
			
 
				+			huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1)
			
 
				 			rankLast[1]++
			
 
				 			totalCost++
			
 
				 		}
			
@@ -722,9 +724,26 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
				 	return maxNbBits
			
 
				 }
			
 
				 
			
 
				-type nodeElt struct {
			
 
				-	count  uint32
			
 
				-	parent uint16
			
 
				-	symbol byte
			
 
				-	nbBits uint8
			
 
				+// A nodeElt is the fields
			
 
				+//
			
 
				+//	count  uint32
			
 
				+//	parent uint16
			
 
				+//	symbol byte
			
 
				+//	nbBits uint8
			
 
				+//
			
 
				+// in some order, all squashed into an integer so that the compiler
			
 
				+// always loads and stores entire nodeElts instead of separate fields.
			
 
				+type nodeElt uint64
			
 
				+
			
 
				+func makeNodeElt(count uint32, symbol byte) nodeElt {
			
 
				+	return nodeElt(count) | nodeElt(symbol)<<48
			
 
				 }
			
 
				+
			
 
				+func (e *nodeElt) count() uint32  { return uint32(*e) }
			
 
				+func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) }
			
 
				+func (e *nodeElt) symbol() byte   { return byte(*e >> 48) }
			
 
				+func (e *nodeElt) nbBits() uint8  { return uint8(*e >> 56) }
			
 
				+
			
 
				+func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) }
			
 
				+func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 }
			
 
				+func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 }
			
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -61,7 +61,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
 
				 		b, err := fse.Decompress(in[:iSize], s.fse)
			
 
				 		s.fse.Out = nil
			
 
				 		if err != nil {
			
 
				-			return s, nil, err
			
 
				+			return s, nil, fmt.Errorf("fse decompress returned: %w", err)
			
 
				 		}
			
 
				 		if len(b) > 255 {
			
 
				 			return s, nil, errors.New("corrupt input: output table too large")
			
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
@@ -4,360 +4,349 @@
 
				 
			
 
				 // func decompress4x_main_loop_amd64(ctx *decompress4xContext)
			
 
				 TEXT ·decompress4x_main_loop_amd64(SB), $0-8
			
 
				-	XORQ DX, DX
			
 
				-
			
 
				 	// Preload values
			
 
				 	MOVQ    ctx+0(FP), AX
			
 
				 	MOVBQZX 8(AX), DI
			
 
				-	MOVQ    16(AX), SI
			
 
				-	MOVQ    48(AX), BX
			
 
				-	MOVQ    24(AX), R9
			
 
				-	MOVQ    32(AX), R10
			
 
				-	MOVQ    (AX), R11
			
 
				+	MOVQ    16(AX), BX
			
 
				+	MOVQ    48(AX), SI
			
 
				+	MOVQ    24(AX), R8
			
 
				+	MOVQ    32(AX), R9
			
 
				+	MOVQ    (AX), R10
			
 
				 
			
 
				 	// Main loop
			
 
				 main_loop:
			
 
				-	MOVQ  SI, R8
			
 
				-	CMPQ  R8, BX
			
 
				+	XORL  DX, DX
			
 
				+	CMPQ  BX, SI
			
 
				 	SETGE DL
			
 
				 
			
 
				 	// br0.fillFast32()
			
 
				-	MOVQ    32(R11), R12
			
 
				-	MOVBQZX 40(R11), R13
			
 
				-	CMPQ    R13, $0x20
			
 
				+	MOVQ    32(R10), R11
			
 
				+	MOVBQZX 40(R10), R12
			
 
				+	CMPQ    R12, $0x20
			
 
				 	JBE     skip_fill0
			
 
				-	MOVQ    24(R11), AX
			
 
				-	SUBQ    $0x20, R13
			
 
				+	MOVQ    24(R10), AX
			
 
				+	SUBQ    $0x20, R12
			
 
				 	SUBQ    $0x04, AX
			
 
				-	MOVQ    (R11), R14
			
 
				+	MOVQ    (R10), R13
			
 
				 
			
 
				 	// b.value |= uint64(low) << (b.bitsRead & 63)
			
 
				-	MOVL (AX)(R14*1), R14
			
 
				-	MOVQ R13, CX
			
 
				-	SHLQ CL, R14
			
 
				-	MOVQ AX, 24(R11)
			
 
				-	ORQ  R14, R12
			
 
				+	MOVL (AX)(R13*1), R13
			
 
				+	MOVQ R12, CX
			
 
				+	SHLQ CL, R13
			
 
				+	MOVQ AX, 24(R10)
			
 
				+	ORQ  R13, R11
			
 
				 
			
 
				-	// exhausted = exhausted || (br0.off < 4)
			
 
				-	CMPQ  AX, $0x04
			
 
				-	SETLT AL
			
 
				-	ORB   AL, DL
			
 
				+	// exhausted += (br0.off < 4)
			
 
				+	CMPQ AX, $0x04
			
 
				+	ADCB $+0, DL
			
 
				 
			
 
				 skip_fill0:
			
 
				 	// val0 := br0.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v0 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br0.advance(uint8(v0.entry)
			
 
				 	MOVB CH, AL
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val1 := br0.peekTopBits(peekBits)
			
 
				 	MOVQ DI, CX
			
 
				-	MOVQ R12, R14
			
 
				-	SHRQ CL, R14
			
 
				+	MOVQ R11, R13
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v1 := table[val1&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br0.advance(uint8(v1.entry))
			
 
				 	MOVB CH, AH
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// these two writes get coalesced
			
 
				 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
			
 
				 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
			
 
				-	MOVW AX, (R8)
			
 
				+	MOVW AX, (BX)
			
 
				 
			
 
				 	// update the bitreader structure
			
 
				-	MOVQ R12, 32(R11)
			
 
				-	MOVB R13, 40(R11)
			
 
				-	ADDQ R9, R8
			
 
				+	MOVQ R11, 32(R10)
			
 
				+	MOVB R12, 40(R10)
			
 
				 
			
 
				 	// br1.fillFast32()
			
 
				-	MOVQ    80(R11), R12
			
 
				-	MOVBQZX 88(R11), R13
			
 
				-	CMPQ    R13, $0x20
			
 
				+	MOVQ    80(R10), R11
			
 
				+	MOVBQZX 88(R10), R12
			
 
				+	CMPQ    R12, $0x20
			
 
				 	JBE     skip_fill1
			
 
				-	MOVQ    72(R11), AX
			
 
				-	SUBQ    $0x20, R13
			
 
				+	MOVQ    72(R10), AX
			
 
				+	SUBQ    $0x20, R12
			
 
				 	SUBQ    $0x04, AX
			
 
				-	MOVQ    48(R11), R14
			
 
				+	MOVQ    48(R10), R13
			
 
				 
			
 
				 	// b.value |= uint64(low) << (b.bitsRead & 63)
			
 
				-	MOVL (AX)(R14*1), R14
			
 
				-	MOVQ R13, CX
			
 
				-	SHLQ CL, R14
			
 
				-	MOVQ AX, 72(R11)
			
 
				-	ORQ  R14, R12
			
 
				+	MOVL (AX)(R13*1), R13
			
 
				+	MOVQ R12, CX
			
 
				+	SHLQ CL, R13
			
 
				+	MOVQ AX, 72(R10)
			
 
				+	ORQ  R13, R11
			
 
				 
			
 
				-	// exhausted = exhausted || (br1.off < 4)
			
 
				-	CMPQ  AX, $0x04
			
 
				-	SETLT AL
			
 
				-	ORB   AL, DL
			
 
				+	// exhausted += (br1.off < 4)
			
 
				+	CMPQ AX, $0x04
			
 
				+	ADCB $+0, DL
			
 
				 
			
 
				 skip_fill1:
			
 
				 	// val0 := br1.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v0 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br1.advance(uint8(v0.entry)
			
 
				 	MOVB CH, AL
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val1 := br1.peekTopBits(peekBits)
			
 
				 	MOVQ DI, CX
			
 
				-	MOVQ R12, R14
			
 
				-	SHRQ CL, R14
			
 
				+	MOVQ R11, R13
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v1 := table[val1&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br1.advance(uint8(v1.entry))
			
 
				 	MOVB CH, AH
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// these two writes get coalesced
			
 
				 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
			
 
				 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
			
 
				-	MOVW AX, (R8)
			
 
				+	MOVW AX, (BX)(R8*1)
			
 
				 
			
 
				 	// update the bitreader structure
			
 
				-	MOVQ R12, 80(R11)
			
 
				-	MOVB R13, 88(R11)
			
 
				-	ADDQ R9, R8
			
 
				+	MOVQ R11, 80(R10)
			
 
				+	MOVB R12, 88(R10)
			
 
				 
			
 
				 	// br2.fillFast32()
			
 
				-	MOVQ    128(R11), R12
			
 
				-	MOVBQZX 136(R11), R13
			
 
				-	CMPQ    R13, $0x20
			
 
				+	MOVQ    128(R10), R11
			
 
				+	MOVBQZX 136(R10), R12
			
 
				+	CMPQ    R12, $0x20
			
 
				 	JBE     skip_fill2
			
 
				-	MOVQ    120(R11), AX
			
 
				-	SUBQ    $0x20, R13
			
 
				+	MOVQ    120(R10), AX
			
 
				+	SUBQ    $0x20, R12
			
 
				 	SUBQ    $0x04, AX
			
 
				-	MOVQ    96(R11), R14
			
 
				+	MOVQ    96(R10), R13
			
 
				 
			
 
				 	// b.value |= uint64(low) << (b.bitsRead & 63)
			
 
				-	MOVL (AX)(R14*1), R14
			
 
				-	MOVQ R13, CX
			
 
				-	SHLQ CL, R14
			
 
				-	MOVQ AX, 120(R11)
			
 
				-	ORQ  R14, R12
			
 
				+	MOVL (AX)(R13*1), R13
			
 
				+	MOVQ R12, CX
			
 
				+	SHLQ CL, R13
			
 
				+	MOVQ AX, 120(R10)
			
 
				+	ORQ  R13, R11
			
 
				 
			
 
				-	// exhausted = exhausted || (br2.off < 4)
			
 
				-	CMPQ  AX, $0x04
			
 
				-	SETLT AL
			
 
				-	ORB   AL, DL
			
 
				+	// exhausted += (br2.off < 4)
			
 
				+	CMPQ AX, $0x04
			
 
				+	ADCB $+0, DL
			
 
				 
			
 
				 skip_fill2:
			
 
				 	// val0 := br2.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v0 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br2.advance(uint8(v0.entry)
			
 
				 	MOVB CH, AL
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val1 := br2.peekTopBits(peekBits)
			
 
				 	MOVQ DI, CX
			
 
				-	MOVQ R12, R14
			
 
				-	SHRQ CL, R14
			
 
				+	MOVQ R11, R13
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v1 := table[val1&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br2.advance(uint8(v1.entry))
			
 
				 	MOVB CH, AH
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// these two writes get coalesced
			
 
				 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
			
 
				 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
			
 
				-	MOVW AX, (R8)
			
 
				+	MOVW AX, (BX)(R8*2)
			
 
				 
			
 
				 	// update the bitreader structure
			
 
				-	MOVQ R12, 128(R11)
			
 
				-	MOVB R13, 136(R11)
			
 
				-	ADDQ R9, R8
			
 
				+	MOVQ R11, 128(R10)
			
 
				+	MOVB R12, 136(R10)
			
 
				 
			
 
				 	// br3.fillFast32()
			
 
				-	MOVQ    176(R11), R12
			
 
				-	MOVBQZX 184(R11), R13
			
 
				-	CMPQ    R13, $0x20
			
 
				+	MOVQ    176(R10), R11
			
 
				+	MOVBQZX 184(R10), R12
			
 
				+	CMPQ    R12, $0x20
			
 
				 	JBE     skip_fill3
			
 
				-	MOVQ    168(R11), AX
			
 
				-	SUBQ    $0x20, R13
			
 
				+	MOVQ    168(R10), AX
			
 
				+	SUBQ    $0x20, R12
			
 
				 	SUBQ    $0x04, AX
			
 
				-	MOVQ    144(R11), R14
			
 
				+	MOVQ    144(R10), R13
			
 
				 
			
 
				 	// b.value |= uint64(low) << (b.bitsRead & 63)
			
 
				-	MOVL (AX)(R14*1), R14
			
 
				-	MOVQ R13, CX
			
 
				-	SHLQ CL, R14
			
 
				-	MOVQ AX, 168(R11)
			
 
				-	ORQ  R14, R12
			
 
				+	MOVL (AX)(R13*1), R13
			
 
				+	MOVQ R12, CX
			
 
				+	SHLQ CL, R13
			
 
				+	MOVQ AX, 168(R10)
			
 
				+	ORQ  R13, R11
			
 
				 
			
 
				-	// exhausted = exhausted || (br3.off < 4)
			
 
				-	CMPQ  AX, $0x04
			
 
				-	SETLT AL
			
 
				-	ORB   AL, DL
			
 
				+	// exhausted += (br3.off < 4)
			
 
				+	CMPQ AX, $0x04
			
 
				+	ADCB $+0, DL
			
 
				 
			
 
				 skip_fill3:
			
 
				 	// val0 := br3.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v0 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br3.advance(uint8(v0.entry)
			
 
				 	MOVB CH, AL
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val1 := br3.peekTopBits(peekBits)
			
 
				 	MOVQ DI, CX
			
 
				-	MOVQ R12, R14
			
 
				-	SHRQ CL, R14
			
 
				+	MOVQ R11, R13
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v1 := table[val1&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br3.advance(uint8(v1.entry))
			
 
				 	MOVB CH, AH
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// these two writes get coalesced
			
 
				 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
			
 
				 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
			
 
				-	MOVW AX, (R8)
			
 
				+	LEAQ (R8)(R8*2), CX
			
 
				+	MOVW AX, (BX)(CX*1)
			
 
				 
			
 
				 	// update the bitreader structure
			
 
				-	MOVQ  R12, 176(R11)
			
 
				-	MOVB  R13, 184(R11)
			
 
				-	ADDQ  $0x02, SI
			
 
				+	MOVQ  R11, 176(R10)
			
 
				+	MOVB  R12, 184(R10)
			
 
				+	ADDQ  $0x02, BX
			
 
				 	TESTB DL, DL
			
 
				 	JZ    main_loop
			
 
				 	MOVQ  ctx+0(FP), AX
			
 
				-	SUBQ  16(AX), SI
			
 
				-	SHLQ  $0x02, SI
			
 
				-	MOVQ  SI, 40(AX)
			
 
				+	SUBQ  16(AX), BX
			
 
				+	SHLQ  $0x02, BX
			
 
				+	MOVQ  BX, 40(AX)
			
 
				 	RET
			
 
				 
			
 
				 // func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
			
 
				 TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
			
 
				-	XORQ DX, DX
			
 
				-
			
 
				 	// Preload values
			
 
				 	MOVQ    ctx+0(FP), CX
			
 
				 	MOVBQZX 8(CX), DI
			
 
				 	MOVQ    16(CX), BX
			
 
				 	MOVQ    48(CX), SI
			
 
				-	MOVQ    24(CX), R9
			
 
				-	MOVQ    32(CX), R10
			
 
				-	MOVQ    (CX), R11
			
 
				+	MOVQ    24(CX), R8
			
 
				+	MOVQ    32(CX), R9
			
 
				+	MOVQ    (CX), R10
			
 
				 
			
 
				 	// Main loop
			
 
				 main_loop:
			
 
				-	MOVQ  BX, R8
			
 
				-	CMPQ  R8, SI
			
 
				+	XORL  DX, DX
			
 
				+	CMPQ  BX, SI
			
 
				 	SETGE DL
			
 
				 
			
 
				 	// br0.fillFast32()
			
 
				-	MOVQ    32(R11), R12
			
 
				-	MOVBQZX 40(R11), R13
			
 
				-	CMPQ    R13, $0x20
			
 
				+	MOVQ    32(R10), R11
			
 
				+	MOVBQZX 40(R10), R12
			
 
				+	CMPQ    R12, $0x20
			
 
				 	JBE     skip_fill0
			
 
				-	MOVQ    24(R11), R14
			
 
				-	SUBQ    $0x20, R13
			
 
				-	SUBQ    $0x04, R14
			
 
				-	MOVQ    (R11), R15
			
 
				+	MOVQ    24(R10), R13
			
 
				+	SUBQ    $0x20, R12
			
 
				+	SUBQ    $0x04, R13
			
 
				+	MOVQ    (R10), R14
			
 
				 
			
 
				 	// b.value |= uint64(low) << (b.bitsRead & 63)
			
 
				-	MOVL (R14)(R15*1), R15
			
 
				-	MOVQ R13, CX
			
 
				-	SHLQ CL, R15
			
 
				-	MOVQ R14, 24(R11)
			
 
				-	ORQ  R15, R12
			
 
				+	MOVL (R13)(R14*1), R14
			
 
				+	MOVQ R12, CX
			
 
				+	SHLQ CL, R14
			
 
				+	MOVQ R13, 24(R10)
			
 
				+	ORQ  R14, R11
			
 
				 
			
 
				-	// exhausted = exhausted || (br0.off < 4)
			
 
				-	CMPQ  R14, $0x04
			
 
				-	SETLT AL
			
 
				-	ORB   AL, DL
			
 
				+	// exhausted += (br0.off < 4)
			
 
				+	CMPQ R13, $0x04
			
 
				+	ADCB $+0, DL
			
 
				 
			
 
				 skip_fill0:
			
 
				 	// val0 := br0.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v0 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br0.advance(uint8(v0.entry)
			
 
				 	MOVB CH, AL
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val1 := br0.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v1 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br0.advance(uint8(v1.entry)
			
 
				 	MOVB   CH, AH
			
 
				-	SHLQ   CL, R12
			
 
				-	ADDB   CL, R13
			
 
				+	SHLQ   CL, R11
			
 
				+	ADDB   CL, R12
			
 
				 	BSWAPL AX
			
 
				 
			
 
				 	// val2 := br0.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v2 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br0.advance(uint8(v2.entry)
			
 
				 	MOVB CH, AH
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val3 := br0.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v3 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br0.advance(uint8(v3.entry)
			
 
				 	MOVB   CH, AL
			
 
				-	SHLQ   CL, R12
			
 
				-	ADDB   CL, R13
			
 
				+	SHLQ   CL, R11
			
 
				+	ADDB   CL, R12
			
 
				 	BSWAPL AX
			
 
				 
			
 
				 	// these four writes get coalesced
			
@@ -365,88 +354,86 @@ skip_fill0:
 
				 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
			
 
				 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
			
 
				 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
			
 
				-	MOVL AX, (R8)
			
 
				+	MOVL AX, (BX)
			
 
				 
			
 
				 	// update the bitreader structure
			
 
				-	MOVQ R12, 32(R11)
			
 
				-	MOVB R13, 40(R11)
			
 
				-	ADDQ R9, R8
			
 
				+	MOVQ R11, 32(R10)
			
 
				+	MOVB R12, 40(R10)
			
 
				 
			
 
				 	// br1.fillFast32()
			
 
				-	MOVQ    80(R11), R12
			
 
				-	MOVBQZX 88(R11), R13
			
 
				-	CMPQ    R13, $0x20
			
 
				+	MOVQ    80(R10), R11
			
 
				+	MOVBQZX 88(R10), R12
			
 
				+	CMPQ    R12, $0x20
			
 
				 	JBE     skip_fill1
			
 
				-	MOVQ    72(R11), R14
			
 
				-	SUBQ    $0x20, R13
			
 
				-	SUBQ    $0x04, R14
			
 
				-	MOVQ    48(R11), R15
			
 
				+	MOVQ    72(R10), R13
			
 
				+	SUBQ    $0x20, R12
			
 
				+	SUBQ    $0x04, R13
			
 
				+	MOVQ    48(R10), R14
			
 
				 
			
 
				 	// b.value |= uint64(low) << (b.bitsRead & 63)
			
 
				-	MOVL (R14)(R15*1), R15
			
 
				-	MOVQ R13, CX
			
 
				-	SHLQ CL, R15
			
 
				-	MOVQ R14, 72(R11)
			
 
				-	ORQ  R15, R12
			
 
				+	MOVL (R13)(R14*1), R14
			
 
				+	MOVQ R12, CX
			
 
				+	SHLQ CL, R14
			
 
				+	MOVQ R13, 72(R10)
			
 
				+	ORQ  R14, R11
			
 
				 
			
 
				-	// exhausted = exhausted || (br1.off < 4)
			
 
				-	CMPQ  R14, $0x04
			
 
				-	SETLT AL
			
 
				-	ORB   AL, DL
			
 
				+	// exhausted += (br1.off < 4)
			
 
				+	CMPQ R13, $0x04
			
 
				+	ADCB $+0, DL
			
 
				 
			
 
				 skip_fill1:
			
 
				 	// val0 := br1.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v0 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br1.advance(uint8(v0.entry)
			
 
				 	MOVB CH, AL
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val1 := br1.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v1 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br1.advance(uint8(v1.entry)
			
 
				 	MOVB   CH, AH
			
 
				-	SHLQ   CL, R12
			
 
				-	ADDB   CL, R13
			
 
				+	SHLQ   CL, R11
			
 
				+	ADDB   CL, R12
			
 
				 	BSWAPL AX
			
 
				 
			
 
				 	// val2 := br1.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v2 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br1.advance(uint8(v2.entry)
			
 
				 	MOVB CH, AH
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val3 := br1.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v3 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br1.advance(uint8(v3.entry)
			
 
				 	MOVB   CH, AL
			
 
				-	SHLQ   CL, R12
			
 
				-	ADDB   CL, R13
			
 
				+	SHLQ   CL, R11
			
 
				+	ADDB   CL, R12
			
 
				 	BSWAPL AX
			
 
				 
			
 
				 	// these four writes get coalesced
			
@@ -454,88 +441,86 @@ skip_fill1:
 
				 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
			
 
				 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
			
 
				 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
			
 
				-	MOVL AX, (R8)
			
 
				+	MOVL AX, (BX)(R8*1)
			
 
				 
			
 
				 	// update the bitreader structure
			
 
				-	MOVQ R12, 80(R11)
			
 
				-	MOVB R13, 88(R11)
			
 
				-	ADDQ R9, R8
			
 
				+	MOVQ R11, 80(R10)
			
 
				+	MOVB R12, 88(R10)
			
 
				 
			
 
				 	// br2.fillFast32()
			
 
				-	MOVQ    128(R11), R12
			
 
				-	MOVBQZX 136(R11), R13
			
 
				-	CMPQ    R13, $0x20
			
 
				+	MOVQ    128(R10), R11
			
 
				+	MOVBQZX 136(R10), R12
			
 
				+	CMPQ    R12, $0x20
			
 
				 	JBE     skip_fill2
			
 
				-	MOVQ    120(R11), R14
			
 
				-	SUBQ    $0x20, R13
			
 
				-	SUBQ    $0x04, R14
			
 
				-	MOVQ    96(R11), R15
			
 
				+	MOVQ    120(R10), R13
			
 
				+	SUBQ    $0x20, R12
			
 
				+	SUBQ    $0x04, R13
			
 
				+	MOVQ    96(R10), R14
			
 
				 
			
 
				 	// b.value |= uint64(low) << (b.bitsRead & 63)
			
 
				-	MOVL (R14)(R15*1), R15
			
 
				-	MOVQ R13, CX
			
 
				-	SHLQ CL, R15
			
 
				-	MOVQ R14, 120(R11)
			
 
				-	ORQ  R15, R12
			
 
				+	MOVL (R13)(R14*1), R14
			
 
				+	MOVQ R12, CX
			
 
				+	SHLQ CL, R14
			
 
				+	MOVQ R13, 120(R10)
			
 
				+	ORQ  R14, R11
			
 
				 
			
 
				-	// exhausted = exhausted || (br2.off < 4)
			
 
				-	CMPQ  R14, $0x04
			
 
				-	SETLT AL
			
 
				-	ORB   AL, DL
			
 
				+	// exhausted += (br2.off < 4)
			
 
				+	CMPQ R13, $0x04
			
 
				+	ADCB $+0, DL
			
 
				 
			
 
				 skip_fill2:
			
 
				 	// val0 := br2.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v0 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br2.advance(uint8(v0.entry)
			
 
				 	MOVB CH, AL
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val1 := br2.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v1 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br2.advance(uint8(v1.entry)
			
 
				 	MOVB   CH, AH
			
 
				-	SHLQ   CL, R12
			
 
				-	ADDB   CL, R13
			
 
				+	SHLQ   CL, R11
			
 
				+	ADDB   CL, R12
			
 
				 	BSWAPL AX
			
 
				 
			
 
				 	// val2 := br2.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v2 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br2.advance(uint8(v2.entry)
			
 
				 	MOVB CH, AH
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val3 := br2.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v3 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br2.advance(uint8(v3.entry)
			
 
				 	MOVB   CH, AL
			
 
				-	SHLQ   CL, R12
			
 
				-	ADDB   CL, R13
			
 
				+	SHLQ   CL, R11
			
 
				+	ADDB   CL, R12
			
 
				 	BSWAPL AX
			
 
				 
			
 
				 	// these four writes get coalesced
			
@@ -543,88 +528,86 @@ skip_fill2:
 
				 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
			
 
				 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
			
 
				 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
			
 
				-	MOVL AX, (R8)
			
 
				+	MOVL AX, (BX)(R8*2)
			
 
				 
			
 
				 	// update the bitreader structure
			
 
				-	MOVQ R12, 128(R11)
			
 
				-	MOVB R13, 136(R11)
			
 
				-	ADDQ R9, R8
			
 
				+	MOVQ R11, 128(R10)
			
 
				+	MOVB R12, 136(R10)
			
 
				 
			
 
				 	// br3.fillFast32()
			
 
				-	MOVQ    176(R11), R12
			
 
				-	MOVBQZX 184(R11), R13
			
 
				-	CMPQ    R13, $0x20
			
 
				+	MOVQ    176(R10), R11
			
 
				+	MOVBQZX 184(R10), R12
			
 
				+	CMPQ    R12, $0x20
			
 
				 	JBE     skip_fill3
			
 
				-	MOVQ    168(R11), R14
			
 
				-	SUBQ    $0x20, R13
			
 
				-	SUBQ    $0x04, R14
			
 
				-	MOVQ    144(R11), R15
			
 
				+	MOVQ    168(R10), R13
			
 
				+	SUBQ    $0x20, R12
			
 
				+	SUBQ    $0x04, R13
			
 
				+	MOVQ    144(R10), R14
			
 
				 
			
 
				 	// b.value |= uint64(low) << (b.bitsRead & 63)
			
 
				-	MOVL (R14)(R15*1), R15
			
 
				-	MOVQ R13, CX
			
 
				-	SHLQ CL, R15
			
 
				-	MOVQ R14, 168(R11)
			
 
				-	ORQ  R15, R12
			
 
				+	MOVL (R13)(R14*1), R14
			
 
				+	MOVQ R12, CX
			
 
				+	SHLQ CL, R14
			
 
				+	MOVQ R13, 168(R10)
			
 
				+	ORQ  R14, R11
			
 
				 
			
 
				-	// exhausted = exhausted || (br3.off < 4)
			
 
				-	CMPQ  R14, $0x04
			
 
				-	SETLT AL
			
 
				-	ORB   AL, DL
			
 
				+	// exhausted += (br3.off < 4)
			
 
				+	CMPQ R13, $0x04
			
 
				+	ADCB $+0, DL
			
 
				 
			
 
				 skip_fill3:
			
 
				 	// val0 := br3.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v0 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br3.advance(uint8(v0.entry)
			
 
				 	MOVB CH, AL
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val1 := br3.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v1 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br3.advance(uint8(v1.entry)
			
 
				 	MOVB   CH, AH
			
 
				-	SHLQ   CL, R12
			
 
				-	ADDB   CL, R13
			
 
				+	SHLQ   CL, R11
			
 
				+	ADDB   CL, R12
			
 
				 	BSWAPL AX
			
 
				 
			
 
				 	// val2 := br3.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v2 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br3.advance(uint8(v2.entry)
			
 
				 	MOVB CH, AH
			
 
				-	SHLQ CL, R12
			
 
				-	ADDB CL, R13
			
 
				+	SHLQ CL, R11
			
 
				+	ADDB CL, R12
			
 
				 
			
 
				 	// val3 := br3.peekTopBits(peekBits)
			
 
				-	MOVQ R12, R14
			
 
				+	MOVQ R11, R13
			
 
				 	MOVQ DI, CX
			
 
				-	SHRQ CL, R14
			
 
				+	SHRQ CL, R13
			
 
				 
			
 
				 	// v3 := table[val0&mask]
			
 
				-	MOVW (R10)(R14*2), CX
			
 
				+	MOVW (R9)(R13*2), CX
			
 
				 
			
 
				 	// br3.advance(uint8(v3.entry)
			
 
				 	MOVB   CH, AL
			
 
				-	SHLQ   CL, R12
			
 
				-	ADDB   CL, R13
			
 
				+	SHLQ   CL, R11
			
 
				+	ADDB   CL, R12
			
 
				 	BSWAPL AX
			
 
				 
			
 
				 	// these four writes get coalesced
			
@@ -632,11 +615,12 @@ skip_fill3:
 
				 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
			
 
				 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
			
 
				 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
			
 
				-	MOVL AX, (R8)
			
 
				+	LEAQ (R8)(R8*2), CX
			
 
				+	MOVL AX, (BX)(CX*1)
			
 
				 
			
 
				 	// update the bitreader structure
			
 
				-	MOVQ  R12, 176(R11)
			
 
				-	MOVB  R13, 184(R11)
			
 
				+	MOVQ  R11, 176(R10)
			
 
				+	MOVB  R12, 184(R10)
			
 
				 	ADDQ  $0x04, BX
			
 
				 	TESTB DL, DL
			
 
				 	JZ    main_loop
			
@@ -652,7 +636,7 @@ TEXT ·decompress1x_main_loop_amd64(SB), $0-8
 
				 	MOVQ    16(CX), DX
			
 
				 	MOVQ    24(CX), BX
			
 
				 	CMPQ    BX, $0x04
			
 
				-	JB      error_max_decoded_size_exeeded
			
 
				+	JB      error_max_decoded_size_exceeded
			
 
				 	LEAQ    (DX)(BX*1), BX
			
 
				 	MOVQ    (CX), SI
			
 
				 	MOVQ    (SI), R8
			
@@ -667,7 +651,7 @@ main_loop:
 
				 	// Check if we have room for 4 bytes in the output buffer
			
 
				 	LEAQ 4(DX), CX
			
 
				 	CMPQ CX, BX
			
 
				-	JGE  error_max_decoded_size_exeeded
			
 
				+	JGE  error_max_decoded_size_exceeded
			
 
				 
			
 
				 	// Decode 4 values
			
 
				 	CMPQ R11, $0x20
			
@@ -744,7 +728,7 @@ loop_condition:
 
				 	RET
			
 
				 
			
 
				 	// Report error
			
 
				-error_max_decoded_size_exeeded:
			
 
				+error_max_decoded_size_exceeded:
			
 
				 	MOVQ ctx+0(FP), AX
			
 
				 	MOVQ $-1, CX
			
 
				 	MOVQ CX, 40(AX)
			
@@ -757,7 +741,7 @@ TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
 
				 	MOVQ    16(CX), DX
			
 
				 	MOVQ    24(CX), BX
			
 
				 	CMPQ    BX, $0x04
			
 
				-	JB      error_max_decoded_size_exeeded
			
 
				+	JB      error_max_decoded_size_exceeded
			
 
				 	LEAQ    (DX)(BX*1), BX
			
 
				 	MOVQ    (CX), SI
			
 
				 	MOVQ    (SI), R8
			
@@ -772,7 +756,7 @@ main_loop:
 
				 	// Check if we have room for 4 bytes in the output buffer
			
 
				 	LEAQ 4(DX), CX
			
 
				 	CMPQ CX, BX
			
 
				-	JGE  error_max_decoded_size_exeeded
			
 
				+	JGE  error_max_decoded_size_exceeded
			
 
				 
			
 
				 	// Decode 4 values
			
 
				 	CMPQ  R11, $0x20
			
@@ -839,7 +823,7 @@ loop_condition:
 
				 	RET
			
 
				 
			
 
				 	// Report error
			
 
				-error_max_decoded_size_exeeded:
			
 
				+error_max_decoded_size_exceeded:
			
 
				 	MOVQ ctx+0(FP), AX
			
 
				 	MOVQ $-1, CX
			
 
				 	MOVQ CX, 40(AX)
			
--- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
@@ -103,6 +103,28 @@ func hash(u, shift uint32) uint32 {
 
				 	return (u * 0x1e35a7bd) >> shift
			
 
				 }
			
 
				 
			
 
				+// EncodeBlockInto exposes encodeBlock but checks dst size.
			
 
				+func EncodeBlockInto(dst, src []byte) (d int) {
			
 
				+	if MaxEncodedLen(len(src)) > len(dst) {
			
 
				+		return 0
			
 
				+	}
			
 
				+
			
 
				+	// encodeBlock breaks on too big blocks, so split.
			
 
				+	for len(src) > 0 {
			
 
				+		p := src
			
 
				+		src = nil
			
 
				+		if len(p) > maxBlockSize {
			
 
				+			p, src = p[:maxBlockSize], p[maxBlockSize:]
			
 
				+		}
			
 
				+		if len(p) < minNonLiteralBlockSize {
			
 
				+			d += emitLiteral(dst[d:], p)
			
 
				+		} else {
			
 
				+			d += encodeBlock(dst[d:], p)
			
 
				+		}
			
 
				+	}
			
 
				+	return d
			
 
				+}
			
 
				+
			
 
				 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
			
 
				 // assumes that the varint-encoded length of the decompressed bytes has already
			
 
				 // been written.
			
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -9,6 +9,7 @@ import (
 
				 	"encoding/binary"
			
 
				 	"errors"
			
 
				 	"fmt"
			
 
				+	"hash/crc32"
			
 
				 	"io"
			
 
				 	"os"
			
 
				 	"path/filepath"
			
@@ -82,8 +83,9 @@ type blockDec struct {
 
				 
			
 
				 	err error
			
 
				 
			
 
				-	// Check against this crc
			
 
				-	checkCRC []byte
			
 
				+	// Check against this crc, if hasCRC is true.
			
 
				+	checkCRC uint32
			
 
				+	hasCRC   bool
			
 
				 
			
 
				 	// Frame to use for singlethreaded decoding.
			
 
				 	// Should not be used by the decoder itself since parent may be another frame.
			
@@ -191,16 +193,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 
				 	}
			
 
				 
			
 
				 	// Read block data.
			
 
				-	if cap(b.dataStorage) < cSize {
			
 
				+	if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize {
			
 
				+		// byteBuf doesn't need a destination buffer.
			
 
				 		if b.lowMem || cSize > maxCompressedBlockSize {
			
 
				 			b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
			
 
				 		} else {
			
 
				 			b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
			
 
				 		}
			
 
				 	}
			
 
				-	if cap(b.dst) <= maxSize {
			
 
				-		b.dst = make([]byte, 0, maxSize+1)
			
 
				-	}
			
 
				 	b.data, err = br.readBig(cSize, b.dataStorage)
			
 
				 	if err != nil {
			
 
				 		if debugDecoder {
			
@@ -209,6 +209,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 
				 		}
			
 
				 		return err
			
 
				 	}
			
 
				+	if cap(b.dst) <= maxSize {
			
 
				+		b.dst = make([]byte, 0, maxSize+1)
			
 
				+	}
			
 
				 	return nil
			
 
				 }
			
 
				 
			
@@ -440,6 +443,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
 
				 			}
			
 
				 		}
			
 
				 		var err error
			
 
				+		if debugDecoder {
			
 
				+			println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
			
 
				+		}
			
 
				 		huff, literals, err = huff0.ReadTable(literals, huff)
			
 
				 		if err != nil {
			
 
				 			println("reading huffman table:", err)
			
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -54,7 +54,7 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
 
				 func (b *byteBuf) readByte() (byte, error) {
			
 
				 	bb := *b
			
 
				 	if len(bb) < 1 {
			
 
				-		return 0, nil
			
 
				+		return 0, io.ErrUnexpectedEOF
			
 
				 	}
			
 
				 	r := bb[0]
			
 
				 	*b = bb[1:]
			
--- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@@ -4,7 +4,6 @@
 
				 package zstd
			
 
				 
			
 
				 import (
			
 
				-	"bytes"
			
 
				 	"encoding/binary"
			
 
				 	"errors"
			
 
				 	"io"
			
@@ -102,8 +101,8 @@ func (h *Header) Decode(in []byte) error {
 
				 	}
			
 
				 	h.HeaderSize += 4
			
 
				 	b, in := in[:4], in[4:]
			
 
				-	if !bytes.Equal(b, frameMagic) {
			
 
				-		if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
			
 
				+	if string(b) != frameMagic {
			
 
				+		if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
			
 
				 			return ErrMagicMismatch
			
 
				 		}
			
 
				 		if len(in) < 4 {
			
@@ -153,7 +152,7 @@ func (h *Header) Decode(in []byte) error {
 
				 		}
			
 
				 		b, in = in[:size], in[size:]
			
 
				 		h.HeaderSize += int(size)
			
 
				-		switch size {
			
 
				+		switch len(b) {
			
 
				 		case 1:
			
 
				 			h.DictionaryID = uint32(b[0])
			
 
				 		case 2:
			
@@ -183,7 +182,7 @@ func (h *Header) Decode(in []byte) error {
 
				 		}
			
 
				 		b, in = in[:fcsSize], in[fcsSize:]
			
 
				 		h.HeaderSize += int(fcsSize)
			
 
				-		switch fcsSize {
			
 
				+		switch len(b) {
			
 
				 		case 1:
			
 
				 			h.FrameContentSize = uint64(b[0])
			
 
				 		case 2:
			
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -5,7 +5,6 @@
 
				 package zstd
			
 
				 
			
 
				 import (
			
 
				-	"bytes"
			
 
				 	"context"
			
 
				 	"encoding/binary"
			
 
				 	"io"
			
@@ -41,8 +40,7 @@ type Decoder struct {
 
				 	frame *frameDec
			
 
				 
			
 
				 	// Custom dictionaries.
			
 
				-	// Always uses copies.
			
 
				-	dicts map[uint32]dict
			
 
				+	dicts map[uint32]*dict
			
 
				 
			
 
				 	// streamWg is the waitgroup for all streams
			
 
				 	streamWg sync.WaitGroup
			
@@ -104,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
 
				 	}
			
 
				 
			
 
				 	// Transfer option dicts.
			
 
				-	d.dicts = make(map[uint32]dict, len(d.o.dicts))
			
 
				+	d.dicts = make(map[uint32]*dict, len(d.o.dicts))
			
 
				 	for _, dc := range d.o.dicts {
			
 
				 		d.dicts[dc.id] = dc
			
 
				 	}
			
@@ -342,15 +340,8 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 
				 			}
			
 
				 			return dst, err
			
 
				 		}
			
 
				-		if frame.DictionaryID != nil {
			
 
				-			dict, ok := d.dicts[*frame.DictionaryID]
			
 
				-			if !ok {
			
 
				-				return nil, ErrUnknownDictionary
			
 
				-			}
			
 
				-			if debugDecoder {
			
 
				-				println("setting dict", frame.DictionaryID)
			
 
				-			}
			
 
				-			frame.history.setDict(&dict)
			
 
				+		if err = d.setDict(frame); err != nil {
			
 
				+			return nil, err
			
 
				 		}
			
 
				 		if frame.WindowSize > d.o.maxWindowSize {
			
 
				 			if debugDecoder {
			
@@ -459,7 +450,11 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 
				 		println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
			
 
				 	}
			
 
				 
			
 
				-	if !d.o.ignoreChecksum && len(next.b) > 0 {
			
 
				+	if d.o.ignoreChecksum {
			
 
				+		return true
			
 
				+	}
			
 
				+
			
 
				+	if len(next.b) > 0 {
			
 
				 		n, err := d.current.crc.Write(next.b)
			
 
				 		if err == nil {
			
 
				 			if n != len(next.b) {
			
@@ -467,18 +462,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-	if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
			
 
				-		got := d.current.crc.Sum64()
			
 
				-		var tmp [4]byte
			
 
				-		binary.LittleEndian.PutUint32(tmp[:], uint32(got))
			
 
				-		if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
			
 
				+	if next.err == nil && next.d != nil && next.d.hasCRC {
			
 
				+		got := uint32(d.current.crc.Sum64())
			
 
				+		if got != next.d.checkCRC {
			
 
				 			if debugDecoder {
			
 
				-				println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
			
 
				+				printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
			
 
				 			}
			
 
				 			d.current.err = ErrCRCMismatch
			
 
				 		} else {
			
 
				 			if debugDecoder {
			
 
				-				println("CRC ok", tmp[:])
			
 
				+				printf("CRC ok %08x\n", got)
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
@@ -494,18 +487,12 @@ func (d *Decoder) nextBlockSync() (ok bool) {
 
				 		if !d.syncStream.inFrame {
			
 
				 			d.frame.history.reset()
			
 
				 			d.current.err = d.frame.reset(&d.syncStream.br)
			
 
				+			if d.current.err == nil {
			
 
				+				d.current.err = d.setDict(d.frame)
			
 
				+			}
			
 
				 			if d.current.err != nil {
			
 
				 				return false
			
 
				 			}
			
 
				-			if d.frame.DictionaryID != nil {
			
 
				-				dict, ok := d.dicts[*d.frame.DictionaryID]
			
 
				-				if !ok {
			
 
				-					d.current.err = ErrUnknownDictionary
			
 
				-					return false
			
 
				-				} else {
			
 
				-					d.frame.history.setDict(&dict)
			
 
				-				}
			
 
				-			}
			
 
				 			if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
			
 
				 				d.current.err = ErrDecoderSizeExceeded
			
 
				 				return false
			
@@ -864,13 +851,8 @@ decodeStream:
 
				 		if debugDecoder && err != nil {
			
 
				 			println("Frame decoder returned", err)
			
 
				 		}
			
 
				-		if err == nil && frame.DictionaryID != nil {
			
 
				-			dict, ok := d.dicts[*frame.DictionaryID]
			
 
				-			if !ok {
			
 
				-				err = ErrUnknownDictionary
			
 
				-			} else {
			
 
				-				frame.history.setDict(&dict)
			
 
				-			}
			
 
				+		if err == nil {
			
 
				+			err = d.setDict(frame)
			
 
				 		}
			
 
				 		if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
			
 
				 			if debugDecoder {
			
@@ -918,18 +900,22 @@ decodeStream:
 
				 				println("next block returned error:", err)
			
 
				 			}
			
 
				 			dec.err = err
			
 
				-			dec.checkCRC = nil
			
 
				+			dec.hasCRC = false
			
 
				 			if dec.Last && frame.HasCheckSum && err == nil {
			
 
				 				crc, err := frame.rawInput.readSmall(4)
			
 
				-				if err != nil {
			
 
				+				if len(crc) < 4 {
			
 
				+					if err == nil {
			
 
				+						err = io.ErrUnexpectedEOF
			
 
				+
			
 
				+					}
			
 
				 					println("CRC missing?", err)
			
 
				 					dec.err = err
			
 
				-				}
			
 
				-				var tmp [4]byte
			
 
				-				copy(tmp[:], crc)
			
 
				-				dec.checkCRC = tmp[:]
			
 
				-				if debugDecoder {
			
 
				-					println("found crc to check:", dec.checkCRC)
			
 
				+				} else {
			
 
				+					dec.checkCRC = binary.LittleEndian.Uint32(crc)
			
 
				+					dec.hasCRC = true
			
 
				+					if debugDecoder {
			
 
				+						printf("found crc to check: %08x\n", dec.checkCRC)
			
 
				+					}
			
 
				 				}
			
 
				 			}
			
 
				 			err = dec.err
			
@@ -948,3 +934,20 @@ decodeStream:
 
				 	hist.reset()
			
 
				 	d.frame.history.b = frameHistCache
			
 
				 }
			
 
				+
			
 
				+func (d *Decoder) setDict(frame *frameDec) (err error) {
			
 
				+	dict, ok := d.dicts[frame.DictionaryID]
			
 
				+	if ok {
			
 
				+		if debugDecoder {
			
 
				+			println("setting dict", frame.DictionaryID)
			
 
				+		}
			
 
				+		frame.history.setDict(dict)
			
 
				+	} else if frame.DictionaryID != 0 {
			
 
				+		// A zero or missing dictionary id is ambiguous:
			
 
				+		// either dictionary zero, or no dictionary. In particular,
			
 
				+		// zstd --patch-from uses this id for the source file,
			
 
				+		// so only return an error if the dictionary id is not zero.
			
 
				+		err = ErrUnknownDictionary
			
 
				+	}
			
 
				+	return err
			
 
				+}
			
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -6,6 +6,8 @@ package zstd
 
				 
			
 
				 import (
			
 
				 	"errors"
			
 
				+	"fmt"
			
 
				+	"math/bits"
			
 
				 	"runtime"
			
 
				 )
			
 
				 
			
@@ -18,7 +20,7 @@ type decoderOptions struct {
 
				 	concurrent      int
			
 
				 	maxDecodedSize  uint64
			
 
				 	maxWindowSize   uint64
			
 
				-	dicts           []dict
			
 
				+	dicts           []*dict
			
 
				 	ignoreChecksum  bool
			
 
				 	limitToCap      bool
			
 
				 	decodeBufsBelow int
			
@@ -85,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption {
 
				 }
			
 
				 
			
 
				 // WithDecoderDicts allows to register one or more dictionaries for the decoder.
			
 
				-// If several dictionaries with the same ID is provided the last one will be used.
			
 
				+//
			
 
				+// Each slice in dict must be in the [dictionary format] produced by
			
 
				+// "zstd --train" from the Zstandard reference implementation.
			
 
				+//
			
 
				+// If several dictionaries with the same ID are provided, the last one will be used.
			
 
				+//
			
 
				+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
			
 
				 func WithDecoderDicts(dicts ...[]byte) DOption {
			
 
				 	return func(o *decoderOptions) error {
			
 
				 		for _, b := range dicts {
			
@@ -93,12 +101,24 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
 
				 			if err != nil {
			
 
				 				return err
			
 
				 			}
			
 
				-			o.dicts = append(o.dicts, *d)
			
 
				+			o.dicts = append(o.dicts, d)
			
 
				 		}
			
 
				 		return nil
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+// WithEncoderDictRaw registers a dictionary that may be used by the decoder.
			
 
				+// The slice content can be arbitrary data.
			
 
				+func WithDecoderDictRaw(id uint32, content []byte) DOption {
			
 
				+	return func(o *decoderOptions) error {
			
 
				+		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
			
 
				+			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
			
 
				+		}
			
 
				+		o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}})
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 // WithDecoderMaxWindow allows to set a maximum window size for decodes.
			
 
				 // This allows rejecting packets that will cause big memory usage.
			
 
				 // The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.
			
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -1,7 +1,6 @@
 
				 package zstd
			
 
				 
			
 
				 import (
			
 
				-	"bytes"
			
 
				 	"encoding/binary"
			
 
				 	"errors"
			
 
				 	"fmt"
			
@@ -20,7 +19,10 @@ type dict struct {
 
				 	content []byte
			
 
				 }
			
 
				 
			
 
				-var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
			
 
				+const dictMagic = "\x37\xa4\x30\xec"
			
 
				+
			
 
				+// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
			
 
				+const dictMaxLength = 1 << 31
			
 
				 
			
 
				 // ID returns the dictionary id or 0 if d is nil.
			
 
				 func (d *dict) ID() uint32 {
			
@@ -30,14 +32,38 @@ func (d *dict) ID() uint32 {
 
				 	return d.id
			
 
				 }
			
 
				 
			
 
				-// DictContentSize returns the dictionary content size or 0 if d is nil.
			
 
				-func (d *dict) DictContentSize() int {
			
 
				+// ContentSize returns the dictionary content size or 0 if d is nil.
			
 
				+func (d *dict) ContentSize() int {
			
 
				 	if d == nil {
			
 
				 		return 0
			
 
				 	}
			
 
				 	return len(d.content)
			
 
				 }
			
 
				 
			
 
				+// Content returns the dictionary content.
			
 
				+func (d *dict) Content() []byte {
			
 
				+	if d == nil {
			
 
				+		return nil
			
 
				+	}
			
 
				+	return d.content
			
 
				+}
			
 
				+
			
 
				+// Offsets returns the initial offsets.
			
 
				+func (d *dict) Offsets() [3]int {
			
 
				+	if d == nil {
			
 
				+		return [3]int{}
			
 
				+	}
			
 
				+	return d.offsets
			
 
				+}
			
 
				+
			
 
				+// LitEncoder returns the literal encoder.
			
 
				+func (d *dict) LitEncoder() *huff0.Scratch {
			
 
				+	if d == nil {
			
 
				+		return nil
			
 
				+	}
			
 
				+	return d.litEnc
			
 
				+}
			
 
				+
			
 
				 // Load a dictionary as described in
			
 
				 // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
			
 
				 func loadDict(b []byte) (*dict, error) {
			
@@ -50,7 +76,7 @@ func loadDict(b []byte) (*dict, error) {
 
				 		ofDec: sequenceDec{fse: &fseDecoder{}},
			
 
				 		mlDec: sequenceDec{fse: &fseDecoder{}},
			
 
				 	}
			
 
				-	if !bytes.Equal(b[:4], dictMagic[:]) {
			
 
				+	if string(b[:4]) != dictMagic {
			
 
				 		return nil, ErrMagicMismatch
			
 
				 	}
			
 
				 	d.id = binary.LittleEndian.Uint32(b[4:8])
			
@@ -62,7 +88,7 @@ func loadDict(b []byte) (*dict, error) {
 
				 	var err error
			
 
				 	d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
			
 
				 	if err != nil {
			
 
				-		return nil, err
			
 
				+		return nil, fmt.Errorf("loading literal table: %w", err)
			
 
				 	}
			
 
				 	d.litEnc.Reuse = huff0.ReusePolicyMust
			
 
				 
			
@@ -120,3 +146,16 @@ func loadDict(b []byte) (*dict, error) {
 
				 
			
 
				 	return &d, nil
			
 
				 }
			
 
				+
			
 
				+// InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
			
 
				+func InspectDictionary(b []byte) (interface {
			
 
				+	ID() uint32
			
 
				+	ContentSize() int
			
 
				+	Content() []byte
			
 
				+	Offsets() [3]int
			
 
				+	LitEncoder() *huff0.Scratch
			
 
				+}, error) {
			
 
				+	initPredefined()
			
 
				+	d, err := loadDict(b)
			
 
				+	return d, err
			
 
				+}
			
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -16,6 +16,7 @@ type fastBase struct {
 
				 	cur int32
			
 
				 	// maximum offset. Should be at least 2x block size.
			
 
				 	maxMatchOff int32
			
 
				+	bufferReset int32
			
 
				 	hist        []byte
			
 
				 	crc         *xxhash.Digest
			
 
				 	tmp         [8]byte
			
@@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc {
 
				 }
			
 
				 
			
 
				 func (e *fastBase) addBlock(src []byte) int32 {
			
 
				-	if debugAsserts && e.cur > bufferReset {
			
 
				-		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
			
 
				+	if debugAsserts && e.cur > e.bufferReset {
			
 
				+		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
			
 
				 	}
			
 
				 	// check if we have space already
			
 
				 	if len(e.hist)+len(src) > cap(e.hist) {
			
@@ -126,24 +127,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
 
				 			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
			
 
				 		}
			
 
				 	}
			
 
				-	a := src[s:]
			
 
				-	b := src[t:]
			
 
				-	b = b[:len(a)]
			
 
				-	end := int32((len(a) >> 3) << 3)
			
 
				-	for i := int32(0); i < end; i += 8 {
			
 
				-		if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
			
 
				-			return i + int32(bits.TrailingZeros64(diff)>>3)
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	a = a[end:]
			
 
				-	b = b[end:]
			
 
				-	for i := range a {
			
 
				-		if a[i] != b[i] {
			
 
				-			return int32(i) + end
			
 
				-		}
			
 
				-	}
			
 
				-	return int32(len(a)) + end
			
 
				+	return int32(matchLen(src[s:], src[t:]))
			
 
				 }
			
 
				 
			
 
				 // Reset the encoding table.
			
@@ -165,13 +149,13 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
 
				 		if singleBlock {
			
 
				 			e.lowMem = true
			
 
				 		}
			
 
				-		e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
			
 
				+		e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
			
 
				 		e.lowMem = low
			
 
				 	}
			
 
				 
			
 
				 	// We offset current position so everything will be out of reach.
			
 
				 	// If above reset line, history will be purged.
			
 
				-	if e.cur < bufferReset {
			
 
				+	if e.cur < e.bufferReset {
			
 
				 		e.cur += e.maxMatchOff + int32(len(e.hist))
			
 
				 	}
			
 
				 	e.hist = e.hist[:0]
			
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -32,7 +32,6 @@ type match struct {
 
				 	length int32
			
 
				 	rep    int32
			
 
				 	est    int32
			
 
				-	_      [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
			
 
				 }
			
 
				 
			
 
				 const highScore = 25000
			
@@ -85,14 +84,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 
				 	)
			
 
				 
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	for e.cur >= bufferReset {
			
 
				+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
			
 
				 		if len(e.hist) == 0 {
			
 
				-			for i := range e.table[:] {
			
 
				-				e.table[i] = prevEntry{}
			
 
				-			}
			
 
				-			for i := range e.longTable[:] {
			
 
				-				e.longTable[i] = prevEntry{}
			
 
				-			}
			
 
				+			e.table = [bestShortTableSize]prevEntry{}
			
 
				+			e.longTable = [bestLongTableSize]prevEntry{}
			
 
				 			e.cur = e.maxMatchOff
			
 
				 			break
			
 
				 		}
			
@@ -193,12 +188,6 @@ encodeLoop:
 
				 			panic("offset0 was 0")
			
 
				 		}
			
 
				 
			
 
				-		bestOf := func(a, b match) match {
			
 
				-			if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
			
 
				-				return a
			
 
				-			}
			
 
				-			return b
			
 
				-		}
			
 
				 		const goodEnough = 100
			
 
				 
			
 
				 		nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
			
@@ -206,36 +195,41 @@ encodeLoop:
 
				 		candidateL := e.longTable[nextHashL]
			
 
				 		candidateS := e.table[nextHashS]
			
 
				 
			
 
				-		matchAt := func(offset int32, s int32, first uint32, rep int32) match {
			
 
				+		// Set m to a match at offset if it looks like that will improve compression.
			
 
				+		improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
			
 
				 			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
			
 
				-				return match{s: s, est: highScore}
			
 
				+				return
			
 
				 			}
			
 
				 			if debugAsserts {
			
 
				 				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
			
 
				 					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
			
 
				 				}
			
 
				 			}
			
 
				-			m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
			
 
				-			m.estBits(bitsPerByte)
			
 
				-			return m
			
 
				+			cand := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
			
 
				+			cand.estBits(bitsPerByte)
			
 
				+			if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
			
 
				+				*m = cand
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				-		best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
			
 
				-		best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
			
 
				-		best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
			
 
				+		best := match{s: s, est: highScore}
			
 
				+		improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
			
 
				+		improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
			
 
				+		improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
			
 
				+		improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
			
 
				 
			
 
				 		if canRepeat && best.length < goodEnough {
			
 
				 			cv32 := uint32(cv >> 8)
			
 
				 			spp := s + 1
			
 
				-			best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
			
 
				-			best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
			
 
				-			best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
			
 
				+			improve(&best, spp-offset1, spp, cv32, 1)
			
 
				+			improve(&best, spp-offset2, spp, cv32, 2)
			
 
				+			improve(&best, spp-offset3, spp, cv32, 3)
			
 
				 			if best.length > 0 {
			
 
				 				cv32 = uint32(cv >> 24)
			
 
				 				spp += 2
			
 
				-				best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
			
 
				-				best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
			
 
				-				best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
			
 
				+				improve(&best, spp-offset1, spp, cv32, 1)
			
 
				+				improve(&best, spp-offset2, spp, cv32, 2)
			
 
				+				improve(&best, spp-offset3, spp, cv32, 3)
			
 
				 			}
			
 
				 		}
			
 
				 		// Load next and check...
			
@@ -262,28 +256,30 @@ encodeLoop:
 
				 			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
			
 
				 
			
 
				 			// Short at s+1
			
 
				-			best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
			
 
				+			improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
			
 
				 			// Long at s+1, s+2
			
 
				-			best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
			
 
				-			best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
			
 
				-			best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
			
 
				-			best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
			
 
				+			improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
			
 
				+			improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
			
 
				+			improve(&best, candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
			
 
				+			improve(&best, candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
			
 
				 			if false {
			
 
				 				// Short at s+3.
			
 
				 				// Too often worse...
			
 
				-				best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
			
 
				+				improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
			
 
				 			}
			
 
				 			// See if we can find a better match by checking where the current best ends.
			
 
				 			// Use that offset to see if we can find a better full match.
			
 
				 			if sAt := best.s + best.length; sAt < sLimit {
			
 
				 				nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
			
 
				 				candidateEnd := e.longTable[nextHashL]
			
 
				-				if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
			
 
				-					bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
			
 
				-					if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
			
 
				-						bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
			
 
				+				// Start check at a fixed offset to allow for a few mismatches.
			
 
				+				// For this compression level 2 yields the best results.
			
 
				+				const skipBeginning = 2
			
 
				+				if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
			
 
				+					improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
			
 
				+					if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
			
 
				+						improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
			
 
				 					}
			
 
				-					best = bestEnd
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
 
				 	)
			
 
				 
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	for e.cur >= bufferReset {
			
 
				+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
			
 
				 		if len(e.hist) == 0 {
			
 
				-			for i := range e.table[:] {
			
 
				-				e.table[i] = tableEntry{}
			
 
				-			}
			
 
				-			for i := range e.longTable[:] {
			
 
				-				e.longTable[i] = prevEntry{}
			
 
				-			}
			
 
				+			e.table = [betterShortTableSize]tableEntry{}
			
 
				+			e.longTable = [betterLongTableSize]prevEntry{}
			
 
				 			e.cur = e.maxMatchOff
			
 
				 			break
			
 
				 		}
			
@@ -587,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 
				 	)
			
 
				 
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	for e.cur >= bufferReset {
			
 
				+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
			
 
				 		if len(e.hist) == 0 {
			
 
				 			for i := range e.table[:] {
			
 
				 				e.table[i] = tableEntry{}
			
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 
				 	)
			
 
				 
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	for e.cur >= bufferReset {
			
 
				+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
			
 
				 		if len(e.hist) == 0 {
			
 
				-			for i := range e.table[:] {
			
 
				-				e.table[i] = tableEntry{}
			
 
				-			}
			
 
				-			for i := range e.longTable[:] {
			
 
				-				e.longTable[i] = tableEntry{}
			
 
				-			}
			
 
				+			e.table = [dFastShortTableSize]tableEntry{}
			
 
				+			e.longTable = [dFastLongTableSize]tableEntry{}
			
 
				 			e.cur = e.maxMatchOff
			
 
				 			break
			
 
				 		}
			
@@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 
				 	)
			
 
				 
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	if e.cur >= bufferReset {
			
 
				+	if e.cur >= e.bufferReset {
			
 
				 		for i := range e.table[:] {
			
 
				 			e.table[i] = tableEntry{}
			
 
				 		}
			
@@ -685,7 +681,7 @@ encodeLoop:
 
				 	}
			
 
				 
			
 
				 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
			
 
				-	if e.cur < bufferReset {
			
 
				+	if e.cur < e.bufferReset {
			
 
				 		e.cur += int32(len(src))
			
 
				 	}
			
 
				 }
			
@@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 
				 	)
			
 
				 
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	for e.cur >= bufferReset {
			
 
				+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
			
 
				 		if len(e.hist) == 0 {
			
 
				 			for i := range e.table[:] {
			
 
				 				e.table[i] = tableEntry{}
			
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 
				 	)
			
 
				 
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	for e.cur >= bufferReset {
			
 
				+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
			
 
				 		if len(e.hist) == 0 {
			
 
				 			for i := range e.table[:] {
			
 
				 				e.table[i] = tableEntry{}
			
@@ -310,7 +310,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 
				 	}
			
 
				 
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	if e.cur >= bufferReset {
			
 
				+	if e.cur >= e.bufferReset {
			
 
				 		for i := range e.table[:] {
			
 
				 			e.table[i] = tableEntry{}
			
 
				 		}
			
@@ -538,7 +538,7 @@ encodeLoop:
 
				 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
			
 
				 	}
			
 
				 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
			
 
				-	if e.cur < bufferReset {
			
 
				+	if e.cur < e.bufferReset {
			
 
				 		e.cur += int32(len(src))
			
 
				 	}
			
 
				 }
			
@@ -555,11 +555,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
 
				 		return
			
 
				 	}
			
 
				 	// Protect against e.cur wraparound.
			
 
				-	for e.cur >= bufferReset {
			
 
				+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
			
 
				 		if len(e.hist) == 0 {
			
 
				-			for i := range e.table[:] {
			
 
				-				e.table[i] = tableEntry{}
			
 
				-			}
			
 
				+			e.table = [tableSize]tableEntry{}
			
 
				 			e.cur = e.maxMatchOff
			
 
				 			break
			
 
				 		}
			
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -8,6 +8,7 @@ import (
 
				 	"crypto/rand"
			
 
				 	"fmt"
			
 
				 	"io"
			
 
				+	"math"
			
 
				 	rdebug "runtime/debug"
			
 
				 	"sync"
			
 
				 
			
@@ -639,3 +640,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 
				 	}
			
 
				 	return dst
			
 
				 }
			
 
				+
			
 
				+// MaxEncodedSize returns the expected maximum
			
 
				+// size of an encoded block or stream.
			
 
				+func (e *Encoder) MaxEncodedSize(size int) int {
			
 
				+	frameHeader := 4 + 2 // magic + frame header & window descriptor
			
 
				+	if e.o.dict != nil {
			
 
				+		frameHeader += 4
			
 
				+	}
			
 
				+	// Frame content size:
			
 
				+	if size < 256 {
			
 
				+		frameHeader++
			
 
				+	} else if size < 65536+256 {
			
 
				+		frameHeader += 2
			
 
				+	} else if size < math.MaxInt32 {
			
 
				+		frameHeader += 4
			
 
				+	} else {
			
 
				+		frameHeader += 8
			
 
				+	}
			
 
				+	// Final crc
			
 
				+	if e.o.crc {
			
 
				+		frameHeader += 4
			
 
				+	}
			
 
				+
			
 
				+	// Max overhead is 3 bytes/block.
			
 
				+	// There cannot be 0 blocks.
			
 
				+	blocks := (size + e.o.blockSize) / e.o.blockSize
			
 
				+
			
 
				+	// Combine, add padding.
			
 
				+	maxSz := frameHeader + 3*blocks + size
			
 
				+	if e.o.pad > 1 {
			
 
				+		maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad))
			
 
				+	}
			
 
				+	return maxSz
			
 
				+}
			
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -3,6 +3,8 @@ package zstd
 
				 import (
			
 
				 	"errors"
			
 
				 	"fmt"
			
 
				+	"math"
			
 
				+	"math/bits"
			
 
				 	"runtime"
			
 
				 	"strings"
			
 
				 )
			
@@ -47,22 +49,22 @@ func (o encoderOptions) encoder() encoder {
 
				 	switch o.level {
			
 
				 	case SpeedFastest:
			
 
				 		if o.dict != nil {
			
 
				-			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
			
 
				+			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
			
 
				 		}
			
 
				-		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
			
 
				+		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
			
 
				 
			
 
				 	case SpeedDefault:
			
 
				 		if o.dict != nil {
			
 
				-			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
			
 
				+			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
			
 
				 		}
			
 
				-		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
			
 
				+		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
			
 
				 	case SpeedBetterCompression:
			
 
				 		if o.dict != nil {
			
 
				-			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
			
 
				+			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
			
 
				 		}
			
 
				-		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
			
 
				+		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
			
 
				 	case SpeedBestCompression:
			
 
				-		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
			
 
				+		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
			
 
				 	}
			
 
				 	panic("unknown compression level")
			
 
				 }
			
@@ -304,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption {
 
				 }
			
 
				 
			
 
				 // WithEncoderDict allows to register a dictionary that will be used for the encode.
			
 
				+//
			
 
				+// The slice dict must be in the [dictionary format] produced by
			
 
				+// "zstd --train" from the Zstandard reference implementation.
			
 
				+//
			
 
				 // The encoder *may* choose to use no dictionary instead for certain payloads.
			
 
				+//
			
 
				+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
			
 
				 func WithEncoderDict(dict []byte) EOption {
			
 
				 	return func(o *encoderOptions) error {
			
 
				 		d, err := loadDict(dict)
			
@@ -315,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption {
 
				 		return nil
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
			
 
				+//
			
 
				+// The slice content may contain arbitrary data. It will be used as an initial
			
 
				+// history.
			
 
				+func WithEncoderDictRaw(id uint32, content []byte) EOption {
			
 
				+	return func(o *encoderOptions) error {
			
 
				+		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
			
 
				+			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
			
 
				+		}
			
 
				+		o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
			
 
				+		return nil
			
 
				+	}
			
 
				+}
			
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -5,7 +5,7 @@
 
				 package zstd
			
 
				 
			
 
				 import (
			
 
				-	"bytes"
			
 
				+	"encoding/binary"
			
 
				 	"encoding/hex"
			
 
				 	"errors"
			
 
				 	"io"
			
@@ -29,7 +29,7 @@ type frameDec struct {
 
				 
			
 
				 	FrameContentSize uint64
			
 
				 
			
 
				-	DictionaryID  *uint32
			
 
				+	DictionaryID  uint32
			
 
				 	HasCheckSum   bool
			
 
				 	SingleSegment bool
			
 
				 }
			
@@ -43,9 +43,9 @@ const (
 
				 	MaxWindowSize = 1 << 29
			
 
				 )
			
 
				 
			
 
				-var (
			
 
				-	frameMagic          = []byte{0x28, 0xb5, 0x2f, 0xfd}
			
 
				-	skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
			
 
				+const (
			
 
				+	frameMagic          = "\x28\xb5\x2f\xfd"
			
 
				+	skippableFrameMagic = "\x2a\x4d\x18"
			
 
				 )
			
 
				 
			
 
				 func newFrameDec(o decoderOptions) *frameDec {
			
@@ -89,9 +89,9 @@ func (d *frameDec) reset(br byteBuffer) error {
 
				 			copy(signature[1:], b)
			
 
				 		}
			
 
				 
			
 
				-		if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
			
 
				+		if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
			
 
				 			if debugDecoder {
			
 
				-				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
			
 
				+				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
			
 
				 			}
			
 
				 			// Break if not skippable frame.
			
 
				 			break
			
@@ -114,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error {
 
				 			return err
			
 
				 		}
			
 
				 	}
			
 
				-	if !bytes.Equal(signature[:], frameMagic) {
			
 
				+	if string(signature[:]) != frameMagic {
			
 
				 		if debugDecoder {
			
 
				-			println("Got magic numbers: ", signature, "want:", frameMagic)
			
 
				+			println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
			
 
				 		}
			
 
				 		return ErrMagicMismatch
			
 
				 	}
			
@@ -155,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 
				 
			
 
				 	// Read Dictionary_ID
			
 
				 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
			
 
				-	d.DictionaryID = nil
			
 
				+	d.DictionaryID = 0
			
 
				 	if size := fhd & 3; size != 0 {
			
 
				 		if size == 3 {
			
 
				 			size = 4
			
@@ -167,7 +167,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 
				 			return err
			
 
				 		}
			
 
				 		var id uint32
			
 
				-		switch size {
			
 
				+		switch len(b) {
			
 
				 		case 1:
			
 
				 			id = uint32(b[0])
			
 
				 		case 2:
			
@@ -178,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 
				 		if debugDecoder {
			
 
				 			println("Dict size", size, "ID:", id)
			
 
				 		}
			
 
				-		if id > 0 {
			
 
				-			// ID 0 means "sorry, no dictionary anyway".
			
 
				-			// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
			
 
				-			d.DictionaryID = &id
			
 
				-		}
			
 
				+		d.DictionaryID = id
			
 
				 	}
			
 
				 
			
 
				 	// Read Frame_Content_Size
			
@@ -204,7 +200,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 
				 			println("Reading Frame content", err)
			
 
				 			return err
			
 
				 		}
			
 
				-		switch fcsSize {
			
 
				+		switch len(b) {
			
 
				 		case 1:
			
 
				 			d.FrameContentSize = uint64(b[0])
			
 
				 		case 2:
			
@@ -305,7 +301,7 @@ func (d *frameDec) checkCRC() error {
 
				 	}
			
 
				 
			
 
				 	// We can overwrite upper tmp now
			
 
				-	want, err := d.rawInput.readSmall(4)
			
 
				+	buf, err := d.rawInput.readSmall(4)
			
 
				 	if err != nil {
			
 
				 		println("CRC missing?", err)
			
 
				 		return err
			
@@ -315,22 +311,17 @@ func (d *frameDec) checkCRC() error {
 
				 		return nil
			
 
				 	}
			
 
				 
			
 
				-	var tmp [4]byte
			
 
				-	got := d.crc.Sum64()
			
 
				-	// Flip to match file order.
			
 
				-	tmp[0] = byte(got >> 0)
			
 
				-	tmp[1] = byte(got >> 8)
			
 
				-	tmp[2] = byte(got >> 16)
			
 
				-	tmp[3] = byte(got >> 24)
			
 
				+	want := binary.LittleEndian.Uint32(buf[:4])
			
 
				+	got := uint32(d.crc.Sum64())
			
 
				 
			
 
				-	if !bytes.Equal(tmp[:], want) {
			
 
				+	if got != want {
			
 
				 		if debugDecoder {
			
 
				-			println("CRC Check Failed:", tmp[:], "!=", want)
			
 
				+			printf("CRC check failed: got %08x, want %08x\n", got, want)
			
 
				 		}
			
 
				 		return ErrCRCMismatch
			
 
				 	}
			
 
				 	if debugDecoder {
			
 
				-		println("CRC ok", tmp[:])
			
 
				+		printf("CRC ok %08x\n", got)
			
 
				 	}
			
 
				 	return nil
			
 
				 }
			
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
@@ -2,12 +2,7 @@
 
				 
			
 
				 VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
			
 
				 
			
 
				-
			
 
				-[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
			
 
				-[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
			
 
				-
			
 
				-xxhash is a Go implementation of the 64-bit
			
 
				-[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
			
 
				+xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
			
 
				 high-quality hashing algorithm that is much faster than anything in the Go
			
 
				 standard library.
			
 
				 
			
@@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error)
 
				 func (*Digest) Sum64() uint64
			
 
				 ```
			
 
				 
			
 
				-This implementation provides a fast pure-Go implementation and an even faster
			
 
				-assembly implementation for amd64.
			
 
				+The package is written with optimized pure Go and also contains even faster
			
 
				+assembly implementations for amd64 and arm64. If desired, the `purego` build tag
			
 
				+opts into using the Go code even on those architectures.
			
 
				+
			
 
				+[xxHash]: http://cyan4973.github.io/xxHash/
			
 
				+
			
 
				+## Compatibility
			
 
				+
			
 
				+This package is in a module and the latest code is in version 2 of the module.
			
 
				+You need a version of Go with at least "minimal module compatibility" to use
			
 
				+github.com/cespare/xxhash/v2:
			
 
				+
			
 
				+* 1.9.7+ for Go 1.9
			
 
				+* 1.10.3+ for Go 1.10
			
 
				+* Go 1.11 or later
			
 
				+
			
 
				+I recommend using the latest release of Go.
			
 
				 
			
 
				 ## Benchmarks
			
 
				 
			
 
				 Here are some quick benchmarks comparing the pure-Go and assembly
			
 
				 implementations of Sum64.
			
 
				 
			
 
				-| input size | purego | asm |
			
 
				-| --- | --- | --- |
			
 
				-| 5 B   |  979.66 MB/s |  1291.17 MB/s  |
			
 
				-| 100 B | 7475.26 MB/s | 7973.40 MB/s  |
			
 
				-| 4 KB  | 17573.46 MB/s | 17602.65 MB/s |
			
 
				-| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
			
 
				+| input size | purego    | asm       |
			
 
				+| ---------- | --------- | --------- |
			
 
				+| 4 B        |  1.3 GB/s |  1.2 GB/s |
			
 
				+| 16 B       |  2.9 GB/s |  3.5 GB/s |
			
 
				+| 100 B      |  6.9 GB/s |  8.1 GB/s |
			
 
				+| 4 KB       | 11.7 GB/s | 16.7 GB/s |
			
 
				+| 10 MB      | 12.0 GB/s | 17.3 GB/s |
			
 
				 
			
 
				-These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
			
 
				-the following commands under Go 1.11.2:
			
 
				+These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
			
 
				+CPU using the following commands under Go 1.19.2:
			
 
				 
			
 
				 ```
			
 
				-$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
			
 
				-$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
			
 
				+benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
			
 
				+benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
			
 
				 ```
			
 
				 
			
 
				 ## Projects using this package
			
 
				 
			
 
				 - [InfluxDB](https://github.com/influxdata/influxdb)
			
 
				 - [Prometheus](https://github.com/prometheus/prometheus)
			
 
				+- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
			
 
				 - [FreeCache](https://github.com/coocood/freecache)
			
 
				+- [FastCache](https://github.com/VictoriaMetrics/fastcache)
			
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
@@ -18,19 +18,11 @@ const (
 
				 	prime5 uint64 = 2870177450012600261
			
 
				 )
			
 
				 
			
 
				-// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
			
 
				-// possible in the Go code is worth a small (but measurable) performance boost
			
 
				-// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
			
 
				-// convenience in the Go code in a few places where we need to intentionally
			
 
				-// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
			
 
				-// result overflows a uint64).
			
 
				-var (
			
 
				-	prime1v = prime1
			
 
				-	prime2v = prime2
			
 
				-	prime3v = prime3
			
 
				-	prime4v = prime4
			
 
				-	prime5v = prime5
			
 
				-)
			
 
				+// Store the primes in an array as well.
			
 
				+//
			
 
				+// The consts are used when possible in Go code to avoid MOVs but we need a
			
 
				+// contiguous array of the assembly code.
			
 
				+var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
			
 
				 
			
 
				 // Digest implements hash.Hash64.
			
 
				 type Digest struct {
			
@@ -52,10 +44,10 @@ func New() *Digest {
 
				 
			
 
				 // Reset clears the Digest's state so that it can be reused.
			
 
				 func (d *Digest) Reset() {
			
 
				-	d.v1 = prime1v + prime2
			
 
				+	d.v1 = primes[0] + prime2
			
 
				 	d.v2 = prime2
			
 
				 	d.v3 = 0
			
 
				-	d.v4 = -prime1v
			
 
				+	d.v4 = -primes[0]
			
 
				 	d.total = 0
			
 
				 	d.n = 0
			
 
				 }
			
@@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
 
				 	n = len(b)
			
 
				 	d.total += uint64(n)
			
 
				 
			
 
				+	memleft := d.mem[d.n&(len(d.mem)-1):]
			
 
				+
			
 
				 	if d.n+n < 32 {
			
 
				 		// This new data doesn't even fill the current block.
			
 
				-		copy(d.mem[d.n:], b)
			
 
				+		copy(memleft, b)
			
 
				 		d.n += n
			
 
				 		return
			
 
				 	}
			
 
				 
			
 
				 	if d.n > 0 {
			
 
				 		// Finish off the partial block.
			
 
				-		copy(d.mem[d.n:], b)
			
 
				+		c := copy(memleft, b)
			
 
				 		d.v1 = round(d.v1, u64(d.mem[0:8]))
			
 
				 		d.v2 = round(d.v2, u64(d.mem[8:16]))
			
 
				 		d.v3 = round(d.v3, u64(d.mem[16:24]))
			
 
				 		d.v4 = round(d.v4, u64(d.mem[24:32]))
			
 
				-		b = b[32-d.n:]
			
 
				+		b = b[c:]
			
 
				 		d.n = 0
			
 
				 	}
			
 
				 
			
@@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 {
 
				 
			
 
				 	h += d.total
			
 
				 
			
 
				-	i, end := 0, d.n
			
 
				-	for ; i+8 <= end; i += 8 {
			
 
				-		k1 := round(0, u64(d.mem[i:i+8]))
			
 
				+	b := d.mem[:d.n&(len(d.mem)-1)]
			
 
				+	for ; len(b) >= 8; b = b[8:] {
			
 
				+		k1 := round(0, u64(b[:8]))
			
 
				 		h ^= k1
			
 
				 		h = rol27(h)*prime1 + prime4
			
 
				 	}
			
 
				-	if i+4 <= end {
			
 
				-		h ^= uint64(u32(d.mem[i:i+4])) * prime1
			
 
				+	if len(b) >= 4 {
			
 
				+		h ^= uint64(u32(b[:4])) * prime1
			
 
				 		h = rol23(h)*prime2 + prime3
			
 
				-		i += 4
			
 
				+		b = b[4:]
			
 
				 	}
			
 
				-	for i < end {
			
 
				-		h ^= uint64(d.mem[i]) * prime5
			
 
				+	for ; len(b) > 0; b = b[1:] {
			
 
				+		h ^= uint64(b[0]) * prime5
			
 
				 		h = rol11(h) * prime1
			
 
				-		i++
			
 
				 	}
			
 
				 
			
 
				 	h ^= h >> 33
			
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
@@ -1,3 +1,4 @@
 
				+//go:build !appengine && gc && !purego && !noasm
			
 
				 // +build !appengine
			
 
				 // +build gc
			
 
				 // +build !purego
			
@@ -5,212 +6,205 @@
 
				 
			
 
				 #include "textflag.h"
			
 
				 
			
 
				-// Register allocation:
			
 
				-// AX	h
			
 
				-// SI	pointer to advance through b
			
 
				-// DX	n
			
 
				-// BX	loop end
			
 
				-// R8	v1, k1
			
 
				-// R9	v2
			
 
				-// R10	v3
			
 
				-// R11	v4
			
 
				-// R12	tmp
			
 
				-// R13	prime1v
			
 
				-// R14	prime2v
			
 
				-// DI	prime4v
			
 
				-
			
 
				-// round reads from and advances the buffer pointer in SI.
			
 
				-// It assumes that R13 has prime1v and R14 has prime2v.
			
 
				-#define round(r) \
			
 
				-	MOVQ  (SI), R12 \
			
 
				-	ADDQ  $8, SI    \
			
 
				-	IMULQ R14, R12  \
			
 
				-	ADDQ  R12, r    \
			
 
				-	ROLQ  $31, r    \
			
 
				-	IMULQ R13, r
			
 
				-
			
 
				-// mergeRound applies a merge round on the two registers acc and val.
			
 
				-// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
			
 
				-#define mergeRound(acc, val) \
			
 
				-	IMULQ R14, val \
			
 
				-	ROLQ  $31, val \
			
 
				-	IMULQ R13, val \
			
 
				-	XORQ  val, acc \
			
 
				-	IMULQ R13, acc \
			
 
				-	ADDQ  DI, acc
			
 
				+// Registers:
			
 
				+#define h      AX
			
 
				+#define d      AX
			
 
				+#define p      SI // pointer to advance through b
			
 
				+#define n      DX
			
 
				+#define end    BX // loop end
			
 
				+#define v1     R8
			
 
				+#define v2     R9
			
 
				+#define v3     R10
			
 
				+#define v4     R11
			
 
				+#define x      R12
			
 
				+#define prime1 R13
			
 
				+#define prime2 R14
			
 
				+#define prime4 DI
			
 
				+
			
 
				+#define round(acc, x) \
			
 
				+	IMULQ prime2, x   \
			
 
				+	ADDQ  x, acc      \
			
 
				+	ROLQ  $31, acc    \
			
 
				+	IMULQ prime1, acc
			
 
				+
			
 
				+// round0 performs the operation x = round(0, x).
			
 
				+#define round0(x) \
			
 
				+	IMULQ prime2, x \
			
 
				+	ROLQ  $31, x    \
			
 
				+	IMULQ prime1, x
			
 
				+
			
 
				+// mergeRound applies a merge round on the two registers acc and x.
			
 
				+// It assumes that prime1, prime2, and prime4 have been loaded.
			
 
				+#define mergeRound(acc, x) \
			
 
				+	round0(x)         \
			
 
				+	XORQ  x, acc      \
			
 
				+	IMULQ prime1, acc \
			
 
				+	ADDQ  prime4, acc
			
 
				+
			
 
				+// blockLoop processes as many 32-byte blocks as possible,
			
 
				+// updating v1, v2, v3, and v4. It assumes that there is at least one block
			
 
				+// to process.
			
 
				+#define blockLoop() \
			
 
				+loop:  \
			
 
				+	MOVQ +0(p), x  \
			
 
				+	round(v1, x)   \
			
 
				+	MOVQ +8(p), x  \
			
 
				+	round(v2, x)   \
			
 
				+	MOVQ +16(p), x \
			
 
				+	round(v3, x)   \
			
 
				+	MOVQ +24(p), x \
			
 
				+	round(v4, x)   \
			
 
				+	ADDQ $32, p    \
			
 
				+	CMPQ p, end    \
			
 
				+	JLE  loop
			
 
				 
			
 
				 // func Sum64(b []byte) uint64
			
 
				-TEXT ·Sum64(SB), NOSPLIT, $0-32
			
 
				+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
			
 
				 	// Load fixed primes.
			
 
				-	MOVQ ·prime1v(SB), R13
			
 
				-	MOVQ ·prime2v(SB), R14
			
 
				-	MOVQ ·prime4v(SB), DI
			
 
				+	MOVQ ·primes+0(SB), prime1
			
 
				+	MOVQ ·primes+8(SB), prime2
			
 
				+	MOVQ ·primes+24(SB), prime4
			
 
				 
			
 
				 	// Load slice.
			
 
				-	MOVQ b_base+0(FP), SI
			
 
				-	MOVQ b_len+8(FP), DX
			
 
				-	LEAQ (SI)(DX*1), BX
			
 
				+	MOVQ b_base+0(FP), p
			
 
				+	MOVQ b_len+8(FP), n
			
 
				+	LEAQ (p)(n*1), end
			
 
				 
			
 
				 	// The first loop limit will be len(b)-32.
			
 
				-	SUBQ $32, BX
			
 
				+	SUBQ $32, end
			
 
				 
			
 
				 	// Check whether we have at least one block.
			
 
				-	CMPQ DX, $32
			
 
				+	CMPQ n, $32
			
 
				 	JLT  noBlocks
			
 
				 
			
 
				 	// Set up initial state (v1, v2, v3, v4).
			
 
				-	MOVQ R13, R8
			
 
				-	ADDQ R14, R8
			
 
				-	MOVQ R14, R9
			
 
				-	XORQ R10, R10
			
 
				-	XORQ R11, R11
			
 
				-	SUBQ R13, R11
			
 
				-
			
 
				-	// Loop until SI > BX.
			
 
				-blockLoop:
			
 
				-	round(R8)
			
 
				-	round(R9)
			
 
				-	round(R10)
			
 
				-	round(R11)
			
 
				-
			
 
				-	CMPQ SI, BX
			
 
				-	JLE  blockLoop
			
 
				-
			
 
				-	MOVQ R8, AX
			
 
				-	ROLQ $1, AX
			
 
				-	MOVQ R9, R12
			
 
				-	ROLQ $7, R12
			
 
				-	ADDQ R12, AX
			
 
				-	MOVQ R10, R12
			
 
				-	ROLQ $12, R12
			
 
				-	ADDQ R12, AX
			
 
				-	MOVQ R11, R12
			
 
				-	ROLQ $18, R12
			
 
				-	ADDQ R12, AX
			
 
				-
			
 
				-	mergeRound(AX, R8)
			
 
				-	mergeRound(AX, R9)
			
 
				-	mergeRound(AX, R10)
			
 
				-	mergeRound(AX, R11)
			
 
				+	MOVQ prime1, v1
			
 
				+	ADDQ prime2, v1
			
 
				+	MOVQ prime2, v2
			
 
				+	XORQ v3, v3
			
 
				+	XORQ v4, v4
			
 
				+	SUBQ prime1, v4
			
 
				+
			
 
				+	blockLoop()
			
 
				+
			
 
				+	MOVQ v1, h
			
 
				+	ROLQ $1, h
			
 
				+	MOVQ v2, x
			
 
				+	ROLQ $7, x
			
 
				+	ADDQ x, h
			
 
				+	MOVQ v3, x
			
 
				+	ROLQ $12, x
			
 
				+	ADDQ x, h
			
 
				+	MOVQ v4, x
			
 
				+	ROLQ $18, x
			
 
				+	ADDQ x, h
			
 
				+
			
 
				+	mergeRound(h, v1)
			
 
				+	mergeRound(h, v2)
			
 
				+	mergeRound(h, v3)
			
 
				+	mergeRound(h, v4)
			
 
				 
			
 
				 	JMP afterBlocks
			
 
				 
			
 
				 noBlocks:
			
 
				-	MOVQ ·prime5v(SB), AX
			
 
				+	MOVQ ·primes+32(SB), h
			
 
				 
			
 
				 afterBlocks:
			
 
				-	ADDQ DX, AX
			
 
				-
			
 
				-	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
			
 
				-	ADDQ $24, BX
			
 
				-
			
 
				-	CMPQ SI, BX
			
 
				-	JG   fourByte
			
 
				-
			
 
				-wordLoop:
			
 
				-	// Calculate k1.
			
 
				-	MOVQ  (SI), R8
			
 
				-	ADDQ  $8, SI
			
 
				-	IMULQ R14, R8
			
 
				-	ROLQ  $31, R8
			
 
				-	IMULQ R13, R8
			
 
				-
			
 
				-	XORQ  R8, AX
			
 
				-	ROLQ  $27, AX
			
 
				-	IMULQ R13, AX
			
 
				-	ADDQ  DI, AX
			
 
				-
			
 
				-	CMPQ SI, BX
			
 
				-	JLE  wordLoop
			
 
				-
			
 
				-fourByte:
			
 
				-	ADDQ $4, BX
			
 
				-	CMPQ SI, BX
			
 
				-	JG   singles
			
 
				-
			
 
				-	MOVL  (SI), R8
			
 
				-	ADDQ  $4, SI
			
 
				-	IMULQ R13, R8
			
 
				-	XORQ  R8, AX
			
 
				-
			
 
				-	ROLQ  $23, AX
			
 
				-	IMULQ R14, AX
			
 
				-	ADDQ  ·prime3v(SB), AX
			
 
				-
			
 
				-singles:
			
 
				-	ADDQ $4, BX
			
 
				-	CMPQ SI, BX
			
 
				+	ADDQ n, h
			
 
				+
			
 
				+	ADDQ $24, end
			
 
				+	CMPQ p, end
			
 
				+	JG   try4
			
 
				+
			
 
				+loop8:
			
 
				+	MOVQ  (p), x
			
 
				+	ADDQ  $8, p
			
 
				+	round0(x)
			
 
				+	XORQ  x, h
			
 
				+	ROLQ  $27, h
			
 
				+	IMULQ prime1, h
			
 
				+	ADDQ  prime4, h
			
 
				+
			
 
				+	CMPQ p, end
			
 
				+	JLE  loop8
			
 
				+
			
 
				+try4:
			
 
				+	ADDQ $4, end
			
 
				+	CMPQ p, end
			
 
				+	JG   try1
			
 
				+
			
 
				+	MOVL  (p), x
			
 
				+	ADDQ  $4, p
			
 
				+	IMULQ prime1, x
			
 
				+	XORQ  x, h
			
 
				+
			
 
				+	ROLQ  $23, h
			
 
				+	IMULQ prime2, h
			
 
				+	ADDQ  ·primes+16(SB), h
			
 
				+
			
 
				+try1:
			
 
				+	ADDQ $4, end
			
 
				+	CMPQ p, end
			
 
				 	JGE  finalize
			
 
				 
			
 
				-singlesLoop:
			
 
				-	MOVBQZX (SI), R12
			
 
				-	ADDQ    $1, SI
			
 
				-	IMULQ   ·prime5v(SB), R12
			
 
				-	XORQ    R12, AX
			
 
				+loop1:
			
 
				+	MOVBQZX (p), x
			
 
				+	ADDQ    $1, p
			
 
				+	IMULQ   ·primes+32(SB), x
			
 
				+	XORQ    x, h
			
 
				+	ROLQ    $11, h
			
 
				+	IMULQ   prime1, h
			
 
				 
			
 
				-	ROLQ  $11, AX
			
 
				-	IMULQ R13, AX
			
 
				-
			
 
				-	CMPQ SI, BX
			
 
				-	JL   singlesLoop
			
 
				+	CMPQ p, end
			
 
				+	JL   loop1
			
 
				 
			
 
				 finalize:
			
 
				-	MOVQ  AX, R12
			
 
				-	SHRQ  $33, R12
			
 
				-	XORQ  R12, AX
			
 
				-	IMULQ R14, AX
			
 
				-	MOVQ  AX, R12
			
 
				-	SHRQ  $29, R12
			
 
				-	XORQ  R12, AX
			
 
				-	IMULQ ·prime3v(SB), AX
			
 
				-	MOVQ  AX, R12
			
 
				-	SHRQ  $32, R12
			
 
				-	XORQ  R12, AX
			
 
				-
			
 
				-	MOVQ AX, ret+24(FP)
			
 
				+	MOVQ  h, x
			
 
				+	SHRQ  $33, x
			
 
				+	XORQ  x, h
			
 
				+	IMULQ prime2, h
			
 
				+	MOVQ  h, x
			
 
				+	SHRQ  $29, x
			
 
				+	XORQ  x, h
			
 
				+	IMULQ ·primes+16(SB), h
			
 
				+	MOVQ  h, x
			
 
				+	SHRQ  $32, x
			
 
				+	XORQ  x, h
			
 
				+
			
 
				+	MOVQ h, ret+24(FP)
			
 
				 	RET
			
 
				 
			
 
				-// writeBlocks uses the same registers as above except that it uses AX to store
			
 
				-// the d pointer.
			
 
				-
			
 
				 // func writeBlocks(d *Digest, b []byte) int
			
 
				-TEXT ·writeBlocks(SB), NOSPLIT, $0-40
			
 
				+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
			
 
				 	// Load fixed primes needed for round.
			
 
				-	MOVQ ·prime1v(SB), R13
			
 
				-	MOVQ ·prime2v(SB), R14
			
 
				+	MOVQ ·primes+0(SB), prime1
			
 
				+	MOVQ ·primes+8(SB), prime2
			
 
				 
			
 
				 	// Load slice.
			
 
				-	MOVQ b_base+8(FP), SI
			
 
				-	MOVQ b_len+16(FP), DX
			
 
				-	LEAQ (SI)(DX*1), BX
			
 
				-	SUBQ $32, BX
			
 
				+	MOVQ b_base+8(FP), p
			
 
				+	MOVQ b_len+16(FP), n
			
 
				+	LEAQ (p)(n*1), end
			
 
				+	SUBQ $32, end
			
 
				 
			
 
				 	// Load vN from d.
			
 
				-	MOVQ d+0(FP), AX
			
 
				-	MOVQ 0(AX), R8   // v1
			
 
				-	MOVQ 8(AX), R9   // v2
			
 
				-	MOVQ 16(AX), R10 // v3
			
 
				-	MOVQ 24(AX), R11 // v4
			
 
				+	MOVQ s+0(FP), d
			
 
				+	MOVQ 0(d), v1
			
 
				+	MOVQ 8(d), v2
			
 
				+	MOVQ 16(d), v3
			
 
				+	MOVQ 24(d), v4
			
 
				 
			
 
				 	// We don't need to check the loop condition here; this function is
			
 
				 	// always called with at least one block of data to process.
			
 
				-blockLoop:
			
 
				-	round(R8)
			
 
				-	round(R9)
			
 
				-	round(R10)
			
 
				-	round(R11)
			
 
				-
			
 
				-	CMPQ SI, BX
			
 
				-	JLE  blockLoop
			
 
				+	blockLoop()
			
 
				 
			
 
				 	// Copy vN back to d.
			
 
				-	MOVQ R8, 0(AX)
			
 
				-	MOVQ R9, 8(AX)
			
 
				-	MOVQ R10, 16(AX)
			
 
				-	MOVQ R11, 24(AX)
			
 
				-
			
 
				-	// The number of bytes written is SI minus the old base pointer.
			
 
				-	SUBQ b_base+8(FP), SI
			
 
				-	MOVQ SI, ret+32(FP)
			
 
				+	MOVQ v1, 0(d)
			
 
				+	MOVQ v2, 8(d)
			
 
				+	MOVQ v3, 16(d)
			
 
				+	MOVQ v4, 24(d)
			
 
				+
			
 
				+	// The number of bytes written is p minus the old base pointer.
			
 
				+	SUBQ b_base+8(FP), p
			
 
				+	MOVQ p, ret+32(FP)
			
 
				 
			
 
				 	RET
			
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
@@ -1,13 +1,17 @@
 
				-// +build gc,!purego,!noasm
			
 
				+//go:build !appengine && gc && !purego && !noasm
			
 
				+// +build !appengine
			
 
				+// +build gc
			
 
				+// +build !purego
			
 
				+// +build !noasm
			
 
				 
			
 
				 #include "textflag.h"
			
 
				 
			
 
				-// Register allocation.
			
 
				+// Registers:
			
 
				 #define digest	R1
			
 
				-#define h	R2 // Return value.
			
 
				-#define p	R3 // Input pointer.
			
 
				-#define len	R4
			
 
				-#define nblocks	R5 // len / 32.
			
 
				+#define h	R2 // return value
			
 
				+#define p	R3 // input pointer
			
 
				+#define n	R4 // input length
			
 
				+#define nblocks	R5 // n / 32
			
 
				 #define prime1	R7
			
 
				 #define prime2	R8
			
 
				 #define prime3	R9
			
@@ -25,60 +29,52 @@
 
				 #define round(acc, x) \
			
 
				 	MADD prime2, acc, x, acc \
			
 
				 	ROR  $64-31, acc         \
			
 
				-	MUL  prime1, acc         \
			
 
				+	MUL  prime1, acc
			
 
				 
			
 
				-// x = round(0, x).
			
 
				+// round0 performs the operation x = round(0, x).
			
 
				 #define round0(x) \
			
 
				 	MUL prime2, x \
			
 
				 	ROR $64-31, x \
			
 
				-	MUL prime1, x \
			
 
				-
			
 
				-#define mergeRound(x) \
			
 
				-	round0(x)                 \
			
 
				-	EOR  x, h                 \
			
 
				-	MADD h, prime4, prime1, h \
			
 
				-
			
 
				-// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
			
 
				-#define blocksLoop() \
			
 
				-	LSR     $5, len, nblocks \
			
 
				-	PCALIGN $16              \
			
 
				-	loop:                    \
			
 
				-	LDP.P   32(p), (x1, x2)  \
			
 
				-	round(v1, x1)            \
			
 
				-	LDP     -16(p), (x3, x4) \
			
 
				-	round(v2, x2)            \
			
 
				-	SUB     $1, nblocks      \
			
 
				-	round(v3, x3)            \
			
 
				-	round(v4, x4)            \
			
 
				-	CBNZ    nblocks, loop    \
			
 
				-
			
 
				-// The primes are repeated here to ensure that they're stored
			
 
				-// in a contiguous array, so we can load them with LDP.
			
 
				-DATA primes<> +0(SB)/8, $11400714785074694791
			
 
				-DATA primes<> +8(SB)/8, $14029467366897019727
			
 
				-DATA primes<>+16(SB)/8, $1609587929392839161
			
 
				-DATA primes<>+24(SB)/8, $9650029242287828579
			
 
				-DATA primes<>+32(SB)/8, $2870177450012600261
			
 
				-GLOBL primes<>(SB), NOPTR+RODATA, $40
			
 
				+	MUL prime1, x
			
 
				+
			
 
				+#define mergeRound(acc, x) \
			
 
				+	round0(x)                     \
			
 
				+	EOR  x, acc                   \
			
 
				+	MADD acc, prime4, prime1, acc
			
 
				+
			
 
				+// blockLoop processes as many 32-byte blocks as possible,
			
 
				+// updating v1, v2, v3, and v4. It assumes that n >= 32.
			
 
				+#define blockLoop() \
			
 
				+	LSR     $5, n, nblocks  \
			
 
				+	PCALIGN $16             \
			
 
				+	loop:                   \
			
 
				+	LDP.P   16(p), (x1, x2) \
			
 
				+	LDP.P   16(p), (x3, x4) \
			
 
				+	round(v1, x1)           \
			
 
				+	round(v2, x2)           \
			
 
				+	round(v3, x3)           \
			
 
				+	round(v4, x4)           \
			
 
				+	SUB     $1, nblocks     \
			
 
				+	CBNZ    nblocks, loop
			
 
				 
			
 
				 // func Sum64(b []byte) uint64
			
 
				-TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
			
 
				-	LDP b_base+0(FP), (p, len)
			
 
				+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
			
 
				+	LDP b_base+0(FP), (p, n)
			
 
				 
			
 
				-	LDP  primes<> +0(SB), (prime1, prime2)
			
 
				-	LDP  primes<>+16(SB), (prime3, prime4)
			
 
				-	MOVD primes<>+32(SB), prime5
			
 
				+	LDP  ·primes+0(SB), (prime1, prime2)
			
 
				+	LDP  ·primes+16(SB), (prime3, prime4)
			
 
				+	MOVD ·primes+32(SB), prime5
			
 
				 
			
 
				-	CMP  $32, len
			
 
				-	CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
			
 
				-	BLO  afterLoop
			
 
				+	CMP  $32, n
			
 
				+	CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
			
 
				+	BLT  afterLoop
			
 
				 
			
 
				 	ADD  prime1, prime2, v1
			
 
				 	MOVD prime2, v2
			
 
				 	MOVD $0, v3
			
 
				 	NEG  prime1, v4
			
 
				 
			
 
				-	blocksLoop()
			
 
				+	blockLoop()
			
 
				 
			
 
				 	ROR $64-1, v1, x1
			
 
				 	ROR $64-7, v2, x2
			
@@ -88,71 +84,75 @@ TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
 
				 	ADD x3, x4
			
 
				 	ADD x2, x4, h
			
 
				 
			
 
				-	mergeRound(v1)
			
 
				-	mergeRound(v2)
			
 
				-	mergeRound(v3)
			
 
				-	mergeRound(v4)
			
 
				+	mergeRound(h, v1)
			
 
				+	mergeRound(h, v2)
			
 
				+	mergeRound(h, v3)
			
 
				+	mergeRound(h, v4)
			
 
				 
			
 
				 afterLoop:
			
 
				-	ADD len, h
			
 
				+	ADD n, h
			
 
				 
			
 
				-	TBZ   $4, len, try8
			
 
				+	TBZ   $4, n, try8
			
 
				 	LDP.P 16(p), (x1, x2)
			
 
				 
			
 
				 	round0(x1)
			
 
				+
			
 
				+	// NOTE: here and below, sequencing the EOR after the ROR (using a
			
 
				+	// rotated register) is worth a small but measurable speedup for small
			
 
				+	// inputs.
			
 
				 	ROR  $64-27, h
			
 
				 	EOR  x1 @> 64-27, h, h
			
 
				 	MADD h, prime4, prime1, h
			
 
				 
			
 
				 	round0(x2)
			
 
				 	ROR  $64-27, h
			
 
				-	EOR  x2 @> 64-27, h
			
 
				+	EOR  x2 @> 64-27, h, h
			
 
				 	MADD h, prime4, prime1, h
			
 
				 
			
 
				 try8:
			
 
				-	TBZ    $3, len, try4
			
 
				+	TBZ    $3, n, try4
			
 
				 	MOVD.P 8(p), x1
			
 
				 
			
 
				 	round0(x1)
			
 
				 	ROR  $64-27, h
			
 
				-	EOR  x1 @> 64-27, h
			
 
				+	EOR  x1 @> 64-27, h, h
			
 
				 	MADD h, prime4, prime1, h
			
 
				 
			
 
				 try4:
			
 
				-	TBZ     $2, len, try2
			
 
				+	TBZ     $2, n, try2
			
 
				 	MOVWU.P 4(p), x2
			
 
				 
			
 
				 	MUL  prime1, x2
			
 
				 	ROR  $64-23, h
			
 
				-	EOR  x2 @> 64-23, h
			
 
				+	EOR  x2 @> 64-23, h, h
			
 
				 	MADD h, prime3, prime2, h
			
 
				 
			
 
				 try2:
			
 
				-	TBZ     $1, len, try1
			
 
				+	TBZ     $1, n, try1
			
 
				 	MOVHU.P 2(p), x3
			
 
				 	AND     $255, x3, x1
			
 
				 	LSR     $8, x3, x2
			
 
				 
			
 
				 	MUL prime5, x1
			
 
				 	ROR $64-11, h
			
 
				-	EOR x1 @> 64-11, h
			
 
				+	EOR x1 @> 64-11, h, h
			
 
				 	MUL prime1, h
			
 
				 
			
 
				 	MUL prime5, x2
			
 
				 	ROR $64-11, h
			
 
				-	EOR x2 @> 64-11, h
			
 
				+	EOR x2 @> 64-11, h, h
			
 
				 	MUL prime1, h
			
 
				 
			
 
				 try1:
			
 
				-	TBZ   $0, len, end
			
 
				+	TBZ   $0, n, finalize
			
 
				 	MOVBU (p), x4
			
 
				 
			
 
				 	MUL prime5, x4
			
 
				 	ROR $64-11, h
			
 
				-	EOR x4 @> 64-11, h
			
 
				+	EOR x4 @> 64-11, h, h
			
 
				 	MUL prime1, h
			
 
				 
			
 
				-end:
			
 
				+finalize:
			
 
				 	EOR h >> 33, h
			
 
				 	MUL prime2, h
			
 
				 	EOR h >> 29, h
			
@@ -163,24 +163,22 @@ end:
 
				 	RET
			
 
				 
			
 
				 // func writeBlocks(d *Digest, b []byte) int
			
 
				-//
			
 
				-// Assumes len(b) >= 32.
			
 
				-TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
			
 
				-	LDP primes<>(SB), (prime1, prime2)
			
 
				+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
			
 
				+	LDP ·primes+0(SB), (prime1, prime2)
			
 
				 
			
 
				 	// Load state. Assume v[1-4] are stored contiguously.
			
 
				 	MOVD d+0(FP), digest
			
 
				 	LDP  0(digest), (v1, v2)
			
 
				 	LDP  16(digest), (v3, v4)
			
 
				 
			
 
				-	LDP b_base+8(FP), (p, len)
			
 
				+	LDP b_base+8(FP), (p, n)
			
 
				 
			
 
				-	blocksLoop()
			
 
				+	blockLoop()
			
 
				 
			
 
				 	// Store updated state.
			
 
				 	STP (v1, v2), 0(digest)
			
 
				 	STP (v3, v4), 16(digest)
			
 
				 
			
 
				-	BIC  $31, len
			
 
				-	MOVD len, ret+32(FP)
			
 
				+	BIC  $31, n
			
 
				+	MOVD n, ret+32(FP)
			
 
				 	RET
			
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
@@ -13,4 +13,4 @@ package xxhash
 
				 func Sum64(b []byte) uint64
			
 
				 
			
 
				 //go:noescape
			
 
				-func writeBlocks(d *Digest, b []byte) int
			
 
				+func writeBlocks(s *Digest, b []byte) int
			
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
@@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 {
 
				 	var h uint64
			
 
				 
			
 
				 	if n >= 32 {
			
 
				-		v1 := prime1v + prime2
			
 
				+		v1 := primes[0] + prime2
			
 
				 		v2 := prime2
			
 
				 		v3 := uint64(0)
			
 
				-		v4 := -prime1v
			
 
				+		v4 := -primes[0]
			
 
				 		for len(b) >= 32 {
			
 
				 			v1 = round(v1, u64(b[0:8:len(b)]))
			
 
				 			v2 = round(v2, u64(b[8:16:len(b)]))
			
@@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 {
 
				 
			
 
				 	h += uint64(n)
			
 
				 
			
 
				-	i, end := 0, len(b)
			
 
				-	for ; i+8 <= end; i += 8 {
			
 
				-		k1 := round(0, u64(b[i:i+8:len(b)]))
			
 
				+	for ; len(b) >= 8; b = b[8:] {
			
 
				+		k1 := round(0, u64(b[:8]))
			
 
				 		h ^= k1
			
 
				 		h = rol27(h)*prime1 + prime4
			
 
				 	}
			
 
				-	if i+4 <= end {
			
 
				-		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
			
 
				+	if len(b) >= 4 {
			
 
				+		h ^= uint64(u32(b[:4])) * prime1
			
 
				 		h = rol23(h)*prime2 + prime3
			
 
				-		i += 4
			
 
				+		b = b[4:]
			
 
				 	}
			
 
				-	for ; i < end; i++ {
			
 
				-		h ^= uint64(b[i]) * prime5
			
 
				+	for ; len(b) > 0; b = b[1:] {
			
 
				+		h ^= uint64(b[0]) * prime5
			
 
				 		h = rol11(h) * prime1
			
 
				 	}
			
 
				 
			
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -314,9 +314,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 
				 		}
			
 
				 		size := ll + ml + len(out)
			
 
				 		if size-startSize > maxBlockSize {
			
 
				-			if size-startSize == 424242 {
			
 
				-				panic("here")
			
 
				-			}
			
 
				 			return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
			
 
				 		}
			
 
				 		if size > cap(out) {
			
@@ -427,8 +424,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	// Check if space for literals
			
 
				-	if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
			
 
				+	if size := len(s.literals) + len(out) - startSize; size > maxBlockSize {
			
 
				 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
			
 
				 	}
			
 
				 
			
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -148,7 +148,6 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 
				 	s.seqSize += ctx.litRemain
			
 
				 	if s.seqSize > maxBlockSize {
			
 
				 		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
			
 
				-
			
 
				 	}
			
 
				 	err := br.close()
			
 
				 	if err != nil {
			
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -320,10 +320,6 @@ error_not_enough_literals:
 
				 	MOVQ $0x00000004, ret+24(FP)
			
 
				 	RET
			
 
				 
			
 
				-	// Return with not enough output space error
			
 
				-	MOVQ $0x00000005, ret+24(FP)
			
 
				-	RET
			
 
				-
			
 
				 // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
			
 
				 // Requires: CMOV
			
 
				 TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
			
@@ -617,10 +613,6 @@ error_not_enough_literals:
 
				 	MOVQ $0x00000004, ret+24(FP)
			
 
				 	RET
			
 
				 
			
 
				-	// Return with not enough output space error
			
 
				-	MOVQ $0x00000005, ret+24(FP)
			
 
				-	RET
			
 
				-
			
 
				 // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
			
 
				 // Requires: BMI, BMI2, CMOV
			
 
				 TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
			
@@ -897,10 +889,6 @@ error_not_enough_literals:
 
				 	MOVQ $0x00000004, ret+24(FP)
			
 
				 	RET
			
 
				 
			
 
				-	// Return with not enough output space error
			
 
				-	MOVQ $0x00000005, ret+24(FP)
			
 
				-	RET
			
 
				-
			
 
				 // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
			
 
				 // Requires: BMI, BMI2, CMOV
			
 
				 TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
			
@@ -1152,10 +1140,6 @@ error_not_enough_literals:
 
				 	MOVQ $0x00000004, ret+24(FP)
			
 
				 	RET
			
 
				 
			
 
				-	// Return with not enough output space error
			
 
				-	MOVQ $0x00000005, ret+24(FP)
			
 
				-	RET
			
 
				-
			
 
				 // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
			
 
				 // Requires: SSE
			
 
				 TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
			
@@ -1389,8 +1373,7 @@ loop_finished:
 
				 	MOVQ ctx+0(FP), AX
			
 
				 	MOVQ DX, 24(AX)
			
 
				 	MOVQ DI, 104(AX)
			
 
				-	MOVQ 80(AX), CX
			
 
				-	SUBQ CX, SI
			
 
				+	SUBQ 80(AX), SI
			
 
				 	MOVQ SI, 112(AX)
			
 
				 	RET
			
 
				 
			
@@ -1402,8 +1385,7 @@ error_match_off_too_big:
 
				 	MOVQ ctx+0(FP), AX
			
 
				 	MOVQ DX, 24(AX)
			
 
				 	MOVQ DI, 104(AX)
			
 
				-	MOVQ 80(AX), CX
			
 
				-	SUBQ CX, SI
			
 
				+	SUBQ 80(AX), SI
			
 
				 	MOVQ SI, 112(AX)
			
 
				 	RET
			
 
				 
			
@@ -1747,8 +1729,7 @@ loop_finished:
 
				 	MOVQ ctx+0(FP), AX
			
 
				 	MOVQ DX, 24(AX)
			
 
				 	MOVQ DI, 104(AX)
			
 
				-	MOVQ 80(AX), CX
			
 
				-	SUBQ CX, SI
			
 
				+	SUBQ 80(AX), SI
			
 
				 	MOVQ SI, 112(AX)
			
 
				 	RET
			
 
				 
			
@@ -1760,8 +1741,7 @@ error_match_off_too_big:
 
				 	MOVQ ctx+0(FP), AX
			
 
				 	MOVQ DX, 24(AX)
			
 
				 	MOVQ DI, 104(AX)
			
 
				-	MOVQ 80(AX), CX
			
 
				-	SUBQ CX, SI
			
 
				+	SUBQ 80(AX), SI
			
 
				 	MOVQ SI, 112(AX)
			
 
				 	RET
			
 
				 
			
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -36,9 +36,6 @@ const forcePreDef = false
 
				 // zstdMinMatch is the minimum zstd match length.
			
 
				 const zstdMinMatch = 3
			
 
				 
			
 
				-// Reset the buffer offset when reaching this.
			
 
				-const bufferReset = math.MaxInt32 - MaxWindowSize
			
 
				-
			
 
				 // fcsUnknown is used for unknown frame content size.
			
 
				 const fcsUnknown = math.MaxUint64
			
 
				 
			
@@ -75,7 +72,6 @@ var (
 
				 	ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
			
 
				 
			
 
				 	// ErrUnknownDictionary is returned if the dictionary ID is unknown.
			
 
				-	// For the time being dictionaries are not supported.
			
 
				 	ErrUnknownDictionary = errors.New("unknown dictionary")
			
 
				 
			
 
				 	// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
			
@@ -110,26 +106,25 @@ func printf(format string, a ...interface{}) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// matchLen returns the maximum length.
			
 
				+// matchLen returns the maximum common prefix length of a and b.
			
 
				 // a must be the shortest of the two.
			
 
				-// The function also returns whether all bytes matched.
			
 
				-func matchLen(a, b []byte) int {
			
 
				-	b = b[:len(a)]
			
 
				-	for i := 0; i < len(a)-7; i += 8 {
			
 
				-		if diff := load64(a, i) ^ load64(b, i); diff != 0 {
			
 
				-			return i + (bits.TrailingZeros64(diff) >> 3)
			
 
				+func matchLen(a, b []byte) (n int) {
			
 
				+	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
			
 
				+		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
			
 
				+		if diff != 0 {
			
 
				+			return n + bits.TrailingZeros64(diff)>>3
			
 
				 		}
			
 
				+		n += 8
			
 
				 	}
			
 
				 
			
 
				-	checked := (len(a) >> 3) << 3
			
 
				-	a = a[checked:]
			
 
				-	b = b[checked:]
			
 
				 	for i := range a {
			
 
				 		if a[i] != b[i] {
			
 
				-			return i + checked
			
 
				+			break
			
 
				 		}
			
 
				+		n++
			
 
				 	}
			
 
				-	return len(a) + checked
			
 
				+	return n
			
 
				+
			
 
				 }
			
 
				 
			
 
				 func load3232(b []byte, i int32) uint32 {
			
@@ -140,10 +135,6 @@ func load6432(b []byte, i int32) uint64 {
 
				 	return binary.LittleEndian.Uint64(b[i:])
			
 
				 }
			
 
				 
			
 
				-func load64(b []byte, i int) uint64 {
			
 
				-	return binary.LittleEndian.Uint64(b[i:])
			
 
				-}
			
 
				-
			
 
				 type byter interface {
			
 
				 	Bytes() []byte
			
 
				 	Len() int
			
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -554,8 +554,8 @@ github.com/inconshreveable/mousetrap
 
				 # github.com/ishidawataru/sctp v0.0.0-20210707070123-9a39160e9062
			
 
				 ## explicit; go 1.12
			
 
				 github.com/ishidawataru/sctp
			
 
				-# github.com/klauspost/compress v1.15.12
			
 
				-## explicit; go 1.17
			
 
				+# github.com/klauspost/compress v1.16.3
			
 
				+## explicit; go 1.18
			
 
				 github.com/klauspost/compress
			
 
				 github.com/klauspost/compress/fse
			
 
				 github.com/klauspost/compress/huff0