浏览代码

Merge pull request #46710 from thaJeztah/23.0_backport_bump_compress

[23.0 backport] vendor: github.com/klauspost/compress v1.17.2
Sebastiaan van Stijn 1 年之前
父节点
当前提交
0360dbe11d
共有 49 个文件被更改,包括 1967 次插入1344 次删除
  1. 1 1
      vendor.mod
  2. 2 2
      vendor.sum
  3. 3 17
      vendor/github.com/klauspost/compress/.goreleaser.yml
  4. 77 1
      vendor/github.com/klauspost/compress/README.md
  5. 25 0
      vendor/github.com/klauspost/compress/SECURITY.md
  6. 1 2
      vendor/github.com/klauspost/compress/fse/bitwriter.go
  7. 17 17
      vendor/github.com/klauspost/compress/fse/compress.go
  8. 3 1
      vendor/github.com/klauspost/compress/fse/decompress.go
  9. 2 6
      vendor/github.com/klauspost/compress/huff0/bitreader.go
  10. 17 10
      vendor/github.com/klauspost/compress/huff0/bitwriter.go
  11. 74 63
      vendor/github.com/klauspost/compress/huff0/compress.go
  12. 2 2
      vendor/github.com/klauspost/compress/huff0/decompress.go
  13. 284 300
      vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
  14. 22 12
      vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
  15. 1 1
      vendor/github.com/klauspost/compress/zstd/README.md
  16. 15 19
      vendor/github.com/klauspost/compress/zstd/bitreader.go
  17. 1 2
      vendor/github.com/klauspost/compress/zstd/bitwriter.go
  18. 13 7
      vendor/github.com/klauspost/compress/zstd/blockdec.go
  19. 28 10
      vendor/github.com/klauspost/compress/zstd/blockenc.go
  20. 2 2
      vendor/github.com/klauspost/compress/zstd/bytebuf.go
  21. 4 5
      vendor/github.com/klauspost/compress/zstd/decodeheader.go
  22. 49 51
      vendor/github.com/klauspost/compress/zstd/decoder.go
  23. 23 3
      vendor/github.com/klauspost/compress/zstd/decoder_options.go
  24. 420 8
      vendor/github.com/klauspost/compress/zstd/dict.go
  25. 7 22
      vendor/github.com/klauspost/compress/zstd/enc_base.go
  26. 120 148
      vendor/github.com/klauspost/compress/zstd/enc_best.go
  27. 4 8
      vendor/github.com/klauspost/compress/zstd/enc_better.go
  28. 7 11
      vendor/github.com/klauspost/compress/zstd/enc_dfast.go
  29. 10 19
      vendor/github.com/klauspost/compress/zstd/enc_fast.go
  30. 52 74
      vendor/github.com/klauspost/compress/zstd/encoder.go
  31. 32 10
      vendor/github.com/klauspost/compress/zstd/encoder_options.go
  32. 31 59
      vendor/github.com/klauspost/compress/zstd/framedec.go
  33. 2 2
      vendor/github.com/klauspost/compress/zstd/frameenc.go
  34. 31 18
      vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
  35. 20 27
      vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
  36. 165 171
      vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
  37. 69 71
      vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
  38. 1 1
      vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
  39. 9 10
      vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
  40. 16 0
      vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
  41. 68 0
      vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
  42. 33 0
      vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
  43. 11 17
      vendor/github.com/klauspost/compress/zstd/seqdec.go
  44. 16 1
      vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
  45. 170 94
      vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
  46. 1 1
      vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
  47. 2 3
      vendor/github.com/klauspost/compress/zstd/snappy.go
  48. 2 33
      vendor/github.com/klauspost/compress/zstd/zstd.go
  49. 2 2
      vendor/modules.txt

+ 1 - 1
vendor.mod

@@ -47,7 +47,7 @@ require (
 	github.com/hashicorp/serf v0.8.5
 	github.com/hashicorp/serf v0.8.5
 	github.com/imdario/mergo v0.3.12
 	github.com/imdario/mergo v0.3.12
 	github.com/ishidawataru/sctp v0.0.0-20230406120618-7ff4192f6ff2
 	github.com/ishidawataru/sctp v0.0.0-20230406120618-7ff4192f6ff2
-	github.com/klauspost/compress v1.15.12
+	github.com/klauspost/compress v1.17.2
 	github.com/miekg/dns v1.1.43
 	github.com/miekg/dns v1.1.43
 	github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible
 	github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible
 	github.com/moby/buildkit v0.10.7-0.20230412161310-d52b2d584242
 	github.com/moby/buildkit v0.10.7-0.20230412161310-d52b2d584242

+ 2 - 2
vendor.sum

@@ -676,8 +676,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
 github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
 github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
-github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM=
-github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM=
+github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
+github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=

+ 3 - 17
vendor/github.com/klauspost/compress/.goreleaser.yml

@@ -3,7 +3,7 @@
 before:
 before:
   hooks:
   hooks:
     - ./gen.sh
     - ./gen.sh
-    - go install mvdan.cc/garble@latest
+    - go install mvdan.cc/garble@v0.10.1
 
 
 builds:
 builds:
   -
   -
@@ -92,16 +92,7 @@ builds:
 archives:
 archives:
   -
   -
     id: s2-binaries
     id: s2-binaries
-    name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
-    replacements:
-      aix: AIX
-      darwin: OSX
-      linux: Linux
-      windows: Windows
-      386: i386
-      amd64: x86_64
-      freebsd: FreeBSD
-      netbsd: NetBSD
+    name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
     format_overrides:
     format_overrides:
       - goos: windows
       - goos: windows
         format: zip
         format: zip
@@ -125,7 +116,7 @@ changelog:
 
 
 nfpms:
 nfpms:
   -
   -
-    file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
     vendor: Klaus Post
     vendor: Klaus Post
     homepage: https://github.com/klauspost/compress
     homepage: https://github.com/klauspost/compress
     maintainer: Klaus Post <klauspost@gmail.com>
     maintainer: Klaus Post <klauspost@gmail.com>
@@ -134,8 +125,3 @@ nfpms:
     formats:
     formats:
       - deb
       - deb
       - rpm
       - rpm
-    replacements:
-      darwin: Darwin
-      linux: Linux
-      freebsd: FreeBSD
-      amd64: x86_64

+ 77 - 1
vendor/github.com/klauspost/compress/README.md

@@ -9,7 +9,6 @@ This package provides various compression algorithms.
 * [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding.
 * [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding.
 * [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently.
 * [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently.
 * [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation.
 * [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation.
-* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here.
 
 
 [![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories)
 [![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories)
 [![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml)
 [![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml)
@@ -17,6 +16,77 @@ This package provides various compression algorithms.
 
 
 # changelog
 # changelog
 
 
+* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
+	* Add experimental dictionary builder  https://github.com/klauspost/compress/pull/853
+	* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
+	* flate: Add limited window compression https://github.com/klauspost/compress/pull/843
+	* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
+	* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
+	* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
+   
+* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
+	* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
+	* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
+
+* June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6)
+	* zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806
+	* zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824
+	* gzhttp: Handle informational headers by @rtribotte in https://github.com/klauspost/compress/pull/815
+	* s2: Improve Better compression slightly https://github.com/klauspost/compress/pull/663
+
+* Apr 16, 2023 - [v1.16.5](https://github.com/klauspost/compress/releases/tag/v1.16.5)
+	* zstd: readByte needs to use io.ReadFull by @jnoxon in https://github.com/klauspost/compress/pull/802
+	* gzip: Fix WriterTo after initial read https://github.com/klauspost/compress/pull/804
+
+* Apr 5, 2023 - [v1.16.4](https://github.com/klauspost/compress/releases/tag/v1.16.4)
+	* zstd: Improve zstd best efficiency by @greatroar and @klauspost in https://github.com/klauspost/compress/pull/784
+	* zstd: Respect WithAllLitEntropyCompression https://github.com/klauspost/compress/pull/792
+	* zstd: Fix amd64 not always detecting corrupt data https://github.com/klauspost/compress/pull/785
+	* zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795
+	* s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779
+	* s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780
+	* gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
+
+* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)
+	* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776
+	* gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767
+	* s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766
+	* zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773
+	* huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774
+
+* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0)
+	* s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support.  https://github.com/klauspost/compress/pull/685
+	* s2: Add Compression Size Estimate.  https://github.com/klauspost/compress/pull/752
+	* s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755
+	* s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748
+	* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747
+	* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746
+
+<details>
+	<summary>See changes to v1.15.x</summary>
+	
+* Jan 21st, 2023 (v1.15.15)
+	* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739
+	* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728
+	* zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745
+	* gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740
+
+* Jan 3rd, 2023 (v1.15.14)
+
+	* flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718
+	* zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720
+	* export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722
+	* s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723
+
+* Dec 11, 2022 (v1.15.13)
+	* zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder  https://github.com/klauspost/compress/pull/691
+	* zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708
+
+* Oct 26, 2022 (v1.15.12)
+
+	* zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680
+	* gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683
+
 * Sept 26, 2022 (v1.15.11)
 * Sept 26, 2022 (v1.15.11)
 
 
 	* flate: Improve level 1-3 compression  https://github.com/klauspost/compress/pull/678
 	* flate: Improve level 1-3 compression  https://github.com/klauspost/compress/pull/678
@@ -121,6 +191,8 @@ Stream decompression is now faster on asynchronous, since the goroutine allocati
 
 
 While the release has been extensively tested, it is recommended to testing when upgrading.
 While the release has been extensively tested, it is recommended to testing when upgrading.
 
 
+</details>
+
 <details>
 <details>
 	<summary>See changes to v1.14.x</summary>
 	<summary>See changes to v1.14.x</summary>
 	
 	
@@ -579,6 +651,10 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv
 * [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression.
 * [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression.
 * [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression.
 * [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression.
 * [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
 * [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
+* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
+* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
+* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.
+* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor.
 
 
 # license
 # license
 
 

+ 25 - 0
vendor/github.com/klauspost/compress/SECURITY.md

@@ -0,0 +1,25 @@
+# Security Policy
+
+## Supported Versions
+
+Security updates are applied only to the latest release.
+
+## Vulnerability Definition
+
+A security vulnerability is a bug that with certain input triggers a crash or an infinite loop. Most calls will have varying execution time and only in rare cases will slow operation be considered a security vulnerability.
+
+Corrupted output generally is not considered a security vulnerability, unless independent operations are able to affect each other. Note that not all functionality is re-entrant and safe to use concurrently.
+
+Out-of-memory crashes only applies if the en/decoder uses an abnormal amount of memory, with appropriate options applied, to limit maximum window size, concurrency, etc. However, if you are in doubt you are welcome to file a security issue.
+
+It is assumed that all callers are trusted, meaning internal data exposed through reflection or inspection of returned data structures is not considered a vulnerability.
+
+Vulnerabilities resulting from compiler/assembler errors should be reported upstream. Depending on the severity this package may or may not implement a workaround.
+
+## Reporting a Vulnerability
+
+If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
+
+Please disclose it at [security advisory](https://github.com/klauspost/compress/security/advisories/new). If possible please provide a minimal reproducer. If the issue only applies to a single platform, it would be helpful to provide access to that.
+
+This project is maintained by a team of volunteers on a reasonable-effort basis. As such, vulnerabilities will be disclosed in a best effort base.

+ 1 - 2
vendor/github.com/klauspost/compress/fse/bitwriter.go

@@ -152,12 +152,11 @@ func (b *bitWriter) flushAlign() {
 
 
 // close will write the alignment bit and write the final byte(s)
 // close will write the alignment bit and write the final byte(s)
 // to the output.
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	// End mark
 	b.addBits16Clean(1, 1)
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	// flush until next byte.
 	b.flushAlign()
 	b.flushAlign()
-	return nil
 }
 }
 
 
 // reset and continue writing by appending to out.
 // reset and continue writing by appending to out.

+ 17 - 17
vendor/github.com/klauspost/compress/fse/compress.go

@@ -146,54 +146,51 @@ func (s *Scratch) compress(src []byte) error {
 		c1.encodeZero(tt[src[ip-2]])
 		c1.encodeZero(tt[src[ip-2]])
 		ip -= 2
 		ip -= 2
 	}
 	}
+	src = src[:ip]
 
 
 	// Main compression loop.
 	// Main compression loop.
 	switch {
 	switch {
 	case !s.zeroBits && s.actualTableLog <= 8:
 	case !s.zeroBits && s.actualTableLog <= 8:
 		// We can encode 4 symbols without requiring a flush.
 		// We can encode 4 symbols without requiring a flush.
 		// We do not need to check if any output is 0 bits.
 		// We do not need to check if any output is 0 bits.
-		for ip >= 4 {
+		for ; len(src) >= 4; src = src[:len(src)-4] {
 			s.bw.flush32()
 			s.bw.flush32()
-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
 			c2.encode(tt[v0])
 			c2.encode(tt[v0])
 			c1.encode(tt[v1])
 			c1.encode(tt[v1])
 			c2.encode(tt[v2])
 			c2.encode(tt[v2])
 			c1.encode(tt[v3])
 			c1.encode(tt[v3])
-			ip -= 4
 		}
 		}
 	case !s.zeroBits:
 	case !s.zeroBits:
 		// We do not need to check if any output is 0 bits.
 		// We do not need to check if any output is 0 bits.
-		for ip >= 4 {
+		for ; len(src) >= 4; src = src[:len(src)-4] {
 			s.bw.flush32()
 			s.bw.flush32()
-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
 			c2.encode(tt[v0])
 			c2.encode(tt[v0])
 			c1.encode(tt[v1])
 			c1.encode(tt[v1])
 			s.bw.flush32()
 			s.bw.flush32()
 			c2.encode(tt[v2])
 			c2.encode(tt[v2])
 			c1.encode(tt[v3])
 			c1.encode(tt[v3])
-			ip -= 4
 		}
 		}
 	case s.actualTableLog <= 8:
 	case s.actualTableLog <= 8:
 		// We can encode 4 symbols without requiring a flush
 		// We can encode 4 symbols without requiring a flush
-		for ip >= 4 {
+		for ; len(src) >= 4; src = src[:len(src)-4] {
 			s.bw.flush32()
 			s.bw.flush32()
-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
 			c2.encodeZero(tt[v0])
 			c2.encodeZero(tt[v0])
 			c1.encodeZero(tt[v1])
 			c1.encodeZero(tt[v1])
 			c2.encodeZero(tt[v2])
 			c2.encodeZero(tt[v2])
 			c1.encodeZero(tt[v3])
 			c1.encodeZero(tt[v3])
-			ip -= 4
 		}
 		}
 	default:
 	default:
-		for ip >= 4 {
+		for ; len(src) >= 4; src = src[:len(src)-4] {
 			s.bw.flush32()
 			s.bw.flush32()
-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
 			c2.encodeZero(tt[v0])
 			c2.encodeZero(tt[v0])
 			c1.encodeZero(tt[v1])
 			c1.encodeZero(tt[v1])
 			s.bw.flush32()
 			s.bw.flush32()
 			c2.encodeZero(tt[v2])
 			c2.encodeZero(tt[v2])
 			c1.encodeZero(tt[v3])
 			c1.encodeZero(tt[v3])
-			ip -= 4
 		}
 		}
 	}
 	}
 
 
@@ -202,7 +199,8 @@ func (s *Scratch) compress(src []byte) error {
 	c2.flush(s.actualTableLog)
 	c2.flush(s.actualTableLog)
 	c1.flush(s.actualTableLog)
 	c1.flush(s.actualTableLog)
 
 
-	return s.bw.close()
+	s.bw.close()
+	return nil
 }
 }
 
 
 // writeCount will write the normalized histogram count to header.
 // writeCount will write the normalized histogram count to header.
@@ -459,15 +457,17 @@ func (s *Scratch) countSimple(in []byte) (max int) {
 	for _, v := range in {
 	for _, v := range in {
 		s.count[v]++
 		s.count[v]++
 	}
 	}
-	m := uint32(0)
+	m, symlen := uint32(0), s.symbolLen
 	for i, v := range s.count[:] {
 	for i, v := range s.count[:] {
+		if v == 0 {
+			continue
+		}
 		if v > m {
 		if v > m {
 			m = v
 			m = v
 		}
 		}
-		if v > 0 {
-			s.symbolLen = uint16(i) + 1
-		}
+		symlen = uint16(i) + 1
 	}
 	}
+	s.symbolLen = symlen
 	return int(m)
 	return int(m)
 }
 }
 
 

+ 3 - 1
vendor/github.com/klauspost/compress/fse/decompress.go

@@ -260,7 +260,9 @@ func (s *Scratch) buildDtable() error {
 // If the buffer is over-read an error is returned.
 // If the buffer is over-read an error is returned.
 func (s *Scratch) decompress() error {
 func (s *Scratch) decompress() error {
 	br := &s.bits
 	br := &s.bits
-	br.init(s.br.unread())
+	if err := br.init(s.br.unread()); err != nil {
+		return err
+	}
 
 
 	var s1, s2 decoder
 	var s1, s2 decoder
 	// Initialize and decode first state and symbol.
 	// Initialize and decode first state and symbol.

+ 2 - 6
vendor/github.com/klauspost/compress/huff0/bitreader.go

@@ -67,7 +67,6 @@ func (b *bitReaderBytes) fillFast() {
 
 
 	// 2 bounds checks.
 	// 2 bounds checks.
 	v := b.in[b.off-4 : b.off]
 	v := b.in[b.off-4 : b.off]
-	v = v[:4]
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	b.value |= uint64(low) << (b.bitsRead - 32)
 	b.value |= uint64(low) << (b.bitsRead - 32)
 	b.bitsRead -= 32
 	b.bitsRead -= 32
@@ -88,8 +87,7 @@ func (b *bitReaderBytes) fill() {
 		return
 		return
 	}
 	}
 	if b.off > 4 {
 	if b.off > 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
+		v := b.in[b.off-4 : b.off]
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		b.value |= uint64(low) << (b.bitsRead - 32)
 		b.value |= uint64(low) << (b.bitsRead - 32)
 		b.bitsRead -= 32
 		b.bitsRead -= 32
@@ -179,7 +177,6 @@ func (b *bitReaderShifted) fillFast() {
 
 
 	// 2 bounds checks.
 	// 2 bounds checks.
 	v := b.in[b.off-4 : b.off]
 	v := b.in[b.off-4 : b.off]
-	v = v[:4]
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
 	b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
 	b.bitsRead -= 32
 	b.bitsRead -= 32
@@ -200,8 +197,7 @@ func (b *bitReaderShifted) fill() {
 		return
 		return
 	}
 	}
 	if b.off > 4 {
 	if b.off > 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
+		v := b.in[b.off-4 : b.off]
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
 		b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
 		b.bitsRead -= 32
 		b.bitsRead -= 32

+ 17 - 10
vendor/github.com/klauspost/compress/huff0/bitwriter.go

@@ -13,14 +13,6 @@ type bitWriter struct {
 	out          []byte
 	out          []byte
 }
 }
 
 
-// bitMask16 is bitmasks. Has extra to avoid bounds check.
-var bitMask16 = [32]uint16{
-	0, 1, 3, 7, 0xF, 0x1F,
-	0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
-	0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
-	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
-	0xFFFF, 0xFFFF} /* up to 16 bits */
-
 // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
 // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
 // It will not check if there is space for them, so the caller must ensure that it has flushed recently.
 // It will not check if there is space for them, so the caller must ensure that it has flushed recently.
 func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
 func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
@@ -60,6 +52,22 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
 	b.nBits += encA.nBits + encB.nBits
 	b.nBits += encA.nBits + encB.nBits
 }
 }
 
 
+// encFourSymbols adds up to 32 bits from four symbols.
+// It will not check if there is space for them,
+// so the caller must ensure that b has been flushed recently.
+func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) {
+	bitsA := encA.nBits
+	bitsB := bitsA + encB.nBits
+	bitsC := bitsB + encC.nBits
+	bitsD := bitsC + encD.nBits
+	combined := uint64(encA.val) |
+		(uint64(encB.val) << (bitsA & 63)) |
+		(uint64(encC.val) << (bitsB & 63)) |
+		(uint64(encD.val) << (bitsC & 63))
+	b.bitContainer |= combined << (b.nBits & 63)
+	b.nBits += bitsD
+}
+
 // flush32 will flush out, so there are at least 32 bits available for writing.
 // flush32 will flush out, so there are at least 32 bits available for writing.
 func (b *bitWriter) flush32() {
 func (b *bitWriter) flush32() {
 	if b.nBits < 32 {
 	if b.nBits < 32 {
@@ -86,10 +94,9 @@ func (b *bitWriter) flushAlign() {
 
 
 // close will write the alignment bit and write the final byte(s)
 // close will write the alignment bit and write the final byte(s)
 // to the output.
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	// End mark
 	b.addBits16Clean(1, 1)
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	// flush until next byte.
 	b.flushAlign()
 	b.flushAlign()
-	return nil
 }
 }

+ 74 - 63
vendor/github.com/klauspost/compress/huff0/compress.go

@@ -227,10 +227,10 @@ func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err err
 }
 }
 
 
 func (s *Scratch) compress1X(src []byte) ([]byte, error) {
 func (s *Scratch) compress1X(src []byte) ([]byte, error) {
-	return s.compress1xDo(s.Out, src)
+	return s.compress1xDo(s.Out, src), nil
 }
 }
 
 
-func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
+func (s *Scratch) compress1xDo(dst, src []byte) []byte {
 	var bw = bitWriter{out: dst}
 	var bw = bitWriter{out: dst}
 
 
 	// N is length divisible by 4.
 	// N is length divisible by 4.
@@ -248,8 +248,7 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
 			tmp := src[n : n+4]
 			tmp := src[n : n+4]
 			// tmp should be len 4
 			// tmp should be len 4
 			bw.flush32()
 			bw.flush32()
-			bw.encTwoSymbols(cTable, tmp[3], tmp[2])
-			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
+			bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]])
 		}
 		}
 	} else {
 	} else {
 		for ; n >= 0; n -= 4 {
 		for ; n >= 0; n -= 4 {
@@ -261,8 +260,8 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
 			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
 			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
 		}
 		}
 	}
 	}
-	err := bw.close()
-	return bw.out, err
+	bw.close()
+	return bw.out
 }
 }
 
 
 var sixZeros [6]byte
 var sixZeros [6]byte
@@ -284,12 +283,8 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) {
 		}
 		}
 		src = src[len(toDo):]
 		src = src[len(toDo):]
 
 
-		var err error
 		idx := len(s.Out)
 		idx := len(s.Out)
-		s.Out, err = s.compress1xDo(s.Out, toDo)
-		if err != nil {
-			return nil, err
-		}
+		s.Out = s.compress1xDo(s.Out, toDo)
 		if len(s.Out)-idx > math.MaxUint16 {
 		if len(s.Out)-idx > math.MaxUint16 {
 			// We cannot store the size in the jump table
 			// We cannot store the size in the jump table
 			return nil, ErrIncompressible
 			return nil, ErrIncompressible
@@ -316,7 +311,6 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
 
 
 	segmentSize := (len(src) + 3) / 4
 	segmentSize := (len(src) + 3) / 4
 	var wg sync.WaitGroup
 	var wg sync.WaitGroup
-	var errs [4]error
 	wg.Add(4)
 	wg.Add(4)
 	for i := 0; i < 4; i++ {
 	for i := 0; i < 4; i++ {
 		toDo := src
 		toDo := src
@@ -327,15 +321,12 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
 
 
 		// Separate goroutine for each block.
 		// Separate goroutine for each block.
 		go func(i int) {
 		go func(i int) {
-			s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
+			s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
 			wg.Done()
 			wg.Done()
 		}(i)
 		}(i)
 	}
 	}
 	wg.Wait()
 	wg.Wait()
 	for i := 0; i < 4; i++ {
 	for i := 0; i < 4; i++ {
-		if errs[i] != nil {
-			return nil, errs[i]
-		}
 		o := s.tmpOut[i]
 		o := s.tmpOut[i]
 		if len(o) > math.MaxUint16 {
 		if len(o) > math.MaxUint16 {
 			// We cannot store the size in the jump table
 			// We cannot store the size in the jump table
@@ -365,29 +356,29 @@ func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
 	m := uint32(0)
 	m := uint32(0)
 	if len(s.prevTable) > 0 {
 	if len(s.prevTable) > 0 {
 		for i, v := range s.count[:] {
 		for i, v := range s.count[:] {
+			if v == 0 {
+				continue
+			}
 			if v > m {
 			if v > m {
 				m = v
 				m = v
 			}
 			}
-			if v > 0 {
-				s.symbolLen = uint16(i) + 1
-				if i >= len(s.prevTable) {
-					reuse = false
-				} else {
-					if s.prevTable[i].nBits == 0 {
-						reuse = false
-					}
-				}
+			s.symbolLen = uint16(i) + 1
+			if i >= len(s.prevTable) {
+				reuse = false
+			} else if s.prevTable[i].nBits == 0 {
+				reuse = false
 			}
 			}
 		}
 		}
 		return int(m), reuse
 		return int(m), reuse
 	}
 	}
 	for i, v := range s.count[:] {
 	for i, v := range s.count[:] {
+		if v == 0 {
+			continue
+		}
 		if v > m {
 		if v > m {
 			m = v
 			m = v
 		}
 		}
-		if v > 0 {
-			s.symbolLen = uint16(i) + 1
-		}
+		s.symbolLen = uint16(i) + 1
 	}
 	}
 	return int(m), false
 	return int(m), false
 }
 }
@@ -484,34 +475,35 @@ func (s *Scratch) buildCTable() error {
 	// Different from reference implementation.
 	// Different from reference implementation.
 	huffNode0 := s.nodes[0 : huffNodesLen+1]
 	huffNode0 := s.nodes[0 : huffNodesLen+1]
 
 
-	for huffNode[nonNullRank].count == 0 {
+	for huffNode[nonNullRank].count() == 0 {
 		nonNullRank--
 		nonNullRank--
 	}
 	}
 
 
 	lowS := int16(nonNullRank)
 	lowS := int16(nonNullRank)
 	nodeRoot := nodeNb + lowS - 1
 	nodeRoot := nodeNb + lowS - 1
 	lowN := nodeNb
 	lowN := nodeNb
-	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
-	huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
+	huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count())
+	huffNode[lowS].setParent(nodeNb)
+	huffNode[lowS-1].setParent(nodeNb)
 	nodeNb++
 	nodeNb++
 	lowS -= 2
 	lowS -= 2
 	for n := nodeNb; n <= nodeRoot; n++ {
 	for n := nodeNb; n <= nodeRoot; n++ {
-		huffNode[n].count = 1 << 30
+		huffNode[n].setCount(1 << 30)
 	}
 	}
 	// fake entry, strong barrier
 	// fake entry, strong barrier
-	huffNode0[0].count = 1 << 31
+	huffNode0[0].setCount(1 << 31)
 
 
 	// create parents
 	// create parents
 	for nodeNb <= nodeRoot {
 	for nodeNb <= nodeRoot {
 		var n1, n2 int16
 		var n1, n2 int16
-		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+		if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
 			n1 = lowS
 			n1 = lowS
 			lowS--
 			lowS--
 		} else {
 		} else {
 			n1 = lowN
 			n1 = lowN
 			lowN++
 			lowN++
 		}
 		}
-		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+		if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
 			n2 = lowS
 			n2 = lowS
 			lowS--
 			lowS--
 		} else {
 		} else {
@@ -519,18 +511,19 @@ func (s *Scratch) buildCTable() error {
 			lowN++
 			lowN++
 		}
 		}
 
 
-		huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
-		huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
+		huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count())
+		huffNode0[n1+1].setParent(nodeNb)
+		huffNode0[n2+1].setParent(nodeNb)
 		nodeNb++
 		nodeNb++
 	}
 	}
 
 
 	// distribute weights (unlimited tree height)
 	// distribute weights (unlimited tree height)
-	huffNode[nodeRoot].nbBits = 0
+	huffNode[nodeRoot].setNbBits(0)
 	for n := nodeRoot - 1; n >= startNode; n-- {
 	for n := nodeRoot - 1; n >= startNode; n-- {
-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+		huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
 	}
 	}
 	for n := uint16(0); n <= nonNullRank; n++ {
 	for n := uint16(0); n <= nonNullRank; n++ {
-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+		huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
 	}
 	}
 	s.actualTableLog = s.setMaxHeight(int(nonNullRank))
 	s.actualTableLog = s.setMaxHeight(int(nonNullRank))
 	maxNbBits := s.actualTableLog
 	maxNbBits := s.actualTableLog
@@ -542,7 +535,7 @@ func (s *Scratch) buildCTable() error {
 	var nbPerRank [tableLogMax + 1]uint16
 	var nbPerRank [tableLogMax + 1]uint16
 	var valPerRank [16]uint16
 	var valPerRank [16]uint16
 	for _, v := range huffNode[:nonNullRank+1] {
 	for _, v := range huffNode[:nonNullRank+1] {
-		nbPerRank[v.nbBits]++
+		nbPerRank[v.nbBits()]++
 	}
 	}
 	// determine stating value per rank
 	// determine stating value per rank
 	{
 	{
@@ -557,7 +550,7 @@ func (s *Scratch) buildCTable() error {
 
 
 	// push nbBits per symbol, symbol order
 	// push nbBits per symbol, symbol order
 	for _, v := range huffNode[:nonNullRank+1] {
 	for _, v := range huffNode[:nonNullRank+1] {
-		s.cTable[v.symbol].nBits = v.nbBits
+		s.cTable[v.symbol()].nBits = v.nbBits()
 	}
 	}
 
 
 	// assign value within rank, symbol order
 	// assign value within rank, symbol order
@@ -603,12 +596,12 @@ func (s *Scratch) huffSort() {
 		pos := rank[r].current
 		pos := rank[r].current
 		rank[r].current++
 		rank[r].current++
 		prev := nodes[(pos-1)&huffNodesMask]
 		prev := nodes[(pos-1)&huffNodesMask]
-		for pos > rank[r].base && c > prev.count {
+		for pos > rank[r].base && c > prev.count() {
 			nodes[pos&huffNodesMask] = prev
 			nodes[pos&huffNodesMask] = prev
 			pos--
 			pos--
 			prev = nodes[(pos-1)&huffNodesMask]
 			prev = nodes[(pos-1)&huffNodesMask]
 		}
 		}
-		nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
+		nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n))
 	}
 	}
 }
 }
 
 
@@ -617,7 +610,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 	huffNode := s.nodes[1 : huffNodesLen+1]
 	huffNode := s.nodes[1 : huffNodesLen+1]
 	//huffNode = huffNode[: huffNodesLen]
 	//huffNode = huffNode[: huffNodesLen]
 
 
-	largestBits := huffNode[lastNonNull].nbBits
+	largestBits := huffNode[lastNonNull].nbBits()
 
 
 	// early exit : no elt > maxNbBits
 	// early exit : no elt > maxNbBits
 	if largestBits <= maxNbBits {
 	if largestBits <= maxNbBits {
@@ -627,14 +620,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 	baseCost := int(1) << (largestBits - maxNbBits)
 	baseCost := int(1) << (largestBits - maxNbBits)
 	n := uint32(lastNonNull)
 	n := uint32(lastNonNull)
 
 
-	for huffNode[n].nbBits > maxNbBits {
-		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
-		huffNode[n].nbBits = maxNbBits
+	for huffNode[n].nbBits() > maxNbBits {
+		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits()))
+		huffNode[n].setNbBits(maxNbBits)
 		n--
 		n--
 	}
 	}
 	// n stops at huffNode[n].nbBits <= maxNbBits
 	// n stops at huffNode[n].nbBits <= maxNbBits
 
 
-	for huffNode[n].nbBits == maxNbBits {
+	for huffNode[n].nbBits() == maxNbBits {
 		n--
 		n--
 	}
 	}
 	// n end at index of smallest symbol using < maxNbBits
 	// n end at index of smallest symbol using < maxNbBits
@@ -655,10 +648,10 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 		{
 		{
 			currentNbBits := maxNbBits
 			currentNbBits := maxNbBits
 			for pos := int(n); pos >= 0; pos-- {
 			for pos := int(n); pos >= 0; pos-- {
-				if huffNode[pos].nbBits >= currentNbBits {
+				if huffNode[pos].nbBits() >= currentNbBits {
 					continue
 					continue
 				}
 				}
-				currentNbBits = huffNode[pos].nbBits // < maxNbBits
+				currentNbBits = huffNode[pos].nbBits() // < maxNbBits
 				rankLast[maxNbBits-currentNbBits] = uint32(pos)
 				rankLast[maxNbBits-currentNbBits] = uint32(pos)
 			}
 			}
 		}
 		}
@@ -675,8 +668,8 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 				if lowPos == noSymbol {
 				if lowPos == noSymbol {
 					break
 					break
 				}
 				}
-				highTotal := huffNode[highPos].count
-				lowTotal := 2 * huffNode[lowPos].count
+				highTotal := huffNode[highPos].count()
+				lowTotal := 2 * huffNode[lowPos].count()
 				if highTotal <= lowTotal {
 				if highTotal <= lowTotal {
 					break
 					break
 				}
 				}
@@ -692,13 +685,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 				// this rank is no longer empty
 				// this rank is no longer empty
 				rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
 				rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
 			}
 			}
-			huffNode[rankLast[nBitsToDecrease]].nbBits++
+			huffNode[rankLast[nBitsToDecrease]].setNbBits(1 +
+				huffNode[rankLast[nBitsToDecrease]].nbBits())
 			if rankLast[nBitsToDecrease] == 0 {
 			if rankLast[nBitsToDecrease] == 0 {
 				/* special case, reached largest symbol */
 				/* special case, reached largest symbol */
 				rankLast[nBitsToDecrease] = noSymbol
 				rankLast[nBitsToDecrease] = noSymbol
 			} else {
 			} else {
 				rankLast[nBitsToDecrease]--
 				rankLast[nBitsToDecrease]--
-				if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
+				if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease {
 					rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
 					rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
 				}
 				}
 			}
 			}
@@ -706,15 +700,15 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
 
 		for totalCost < 0 { /* Sometimes, cost correction overshoot */
 		for totalCost < 0 { /* Sometimes, cost correction overshoot */
 			if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
 			if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
-				for huffNode[n].nbBits == maxNbBits {
+				for huffNode[n].nbBits() == maxNbBits {
 					n--
 					n--
 				}
 				}
-				huffNode[n+1].nbBits--
+				huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1)
 				rankLast[1] = n + 1
 				rankLast[1] = n + 1
 				totalCost++
 				totalCost++
 				continue
 				continue
 			}
 			}
-			huffNode[rankLast[1]+1].nbBits--
+			huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1)
 			rankLast[1]++
 			rankLast[1]++
 			totalCost++
 			totalCost++
 		}
 		}
@@ -722,9 +716,26 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 	return maxNbBits
 	return maxNbBits
 }
 }
 
 
-type nodeElt struct {
-	count  uint32
-	parent uint16
-	symbol byte
-	nbBits uint8
+// A nodeElt is the fields
+//
+//	count  uint32
+//	parent uint16
+//	symbol byte
+//	nbBits uint8
+//
+// in some order, all squashed into an integer so that the compiler
+// always loads and stores entire nodeElts instead of separate fields.
+type nodeElt uint64
+
+func makeNodeElt(count uint32, symbol byte) nodeElt {
+	return nodeElt(count) | nodeElt(symbol)<<48
 }
 }
+
+func (e *nodeElt) count() uint32  { return uint32(*e) }
+func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) }
+func (e *nodeElt) symbol() byte   { return byte(*e >> 48) }
+func (e *nodeElt) nbBits() uint8  { return uint8(*e >> 56) }
+
+func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) }
+func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 }
+func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 }

+ 2 - 2
vendor/github.com/klauspost/compress/huff0/decompress.go

@@ -61,7 +61,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
 		b, err := fse.Decompress(in[:iSize], s.fse)
 		b, err := fse.Decompress(in[:iSize], s.fse)
 		s.fse.Out = nil
 		s.fse.Out = nil
 		if err != nil {
 		if err != nil {
-			return s, nil, err
+			return s, nil, fmt.Errorf("fse decompress returned: %w", err)
 		}
 		}
 		if len(b) > 255 {
 		if len(b) > 255 {
 			return s, nil, errors.New("corrupt input: output table too large")
 			return s, nil, errors.New("corrupt input: output table too large")
@@ -253,7 +253,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 
 
 	switch d.actualTableLog {
 	switch d.actualTableLog {
 	case 8:
 	case 8:
-		const shift = 8 - 8
+		const shift = 0
 		for br.off >= 4 {
 		for br.off >= 4 {
 			br.fillFast()
 			br.fillFast()
 			v := dt[uint8(br.value>>(56+shift))]
 			v := dt[uint8(br.value>>(56+shift))]

+ 284 - 300
vendor/github.com/klauspost/compress/huff0/decompress_amd64.s

@@ -4,360 +4,349 @@
 
 
 // func decompress4x_main_loop_amd64(ctx *decompress4xContext)
 // func decompress4x_main_loop_amd64(ctx *decompress4xContext)
 TEXT ·decompress4x_main_loop_amd64(SB), $0-8
 TEXT ·decompress4x_main_loop_amd64(SB), $0-8
-	XORQ DX, DX
-
 	// Preload values
 	// Preload values
 	MOVQ    ctx+0(FP), AX
 	MOVQ    ctx+0(FP), AX
 	MOVBQZX 8(AX), DI
 	MOVBQZX 8(AX), DI
-	MOVQ    16(AX), SI
-	MOVQ    48(AX), BX
-	MOVQ    24(AX), R9
-	MOVQ    32(AX), R10
-	MOVQ    (AX), R11
+	MOVQ    16(AX), BX
+	MOVQ    48(AX), SI
+	MOVQ    24(AX), R8
+	MOVQ    32(AX), R9
+	MOVQ    (AX), R10
 
 
 	// Main loop
 	// Main loop
 main_loop:
 main_loop:
-	MOVQ  SI, R8
-	CMPQ  R8, BX
+	XORL  DX, DX
+	CMPQ  BX, SI
 	SETGE DL
 	SETGE DL
 
 
 	// br0.fillFast32()
 	// br0.fillFast32()
-	MOVQ    32(R11), R12
-	MOVBQZX 40(R11), R13
-	CMPQ    R13, $0x20
+	MOVQ    32(R10), R11
+	MOVBQZX 40(R10), R12
+	CMPQ    R12, $0x20
 	JBE     skip_fill0
 	JBE     skip_fill0
-	MOVQ    24(R11), AX
-	SUBQ    $0x20, R13
+	MOVQ    24(R10), AX
+	SUBQ    $0x20, R12
 	SUBQ    $0x04, AX
 	SUBQ    $0x04, AX
-	MOVQ    (R11), R14
+	MOVQ    (R10), R13
 
 
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	// b.value |= uint64(low) << (b.bitsRead & 63)
-	MOVL (AX)(R14*1), R14
-	MOVQ R13, CX
-	SHLQ CL, R14
-	MOVQ AX, 24(R11)
-	ORQ  R14, R12
+	MOVL (AX)(R13*1), R13
+	MOVQ R12, CX
+	SHLQ CL, R13
+	MOVQ AX, 24(R10)
+	ORQ  R13, R11
 
 
-	// exhausted = exhausted || (br0.off < 4)
-	CMPQ  AX, $0x04
-	SETLT AL
-	ORB   AL, DL
+	// exhausted += (br0.off < 4)
+	CMPQ AX, $0x04
+	ADCB $+0, DL
 
 
 skip_fill0:
 skip_fill0:
 	// val0 := br0.peekTopBits(peekBits)
 	// val0 := br0.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v0 := table[val0&mask]
 	// v0 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br0.advance(uint8(v0.entry)
 	// br0.advance(uint8(v0.entry)
 	MOVB CH, AL
 	MOVB CH, AL
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val1 := br0.peekTopBits(peekBits)
 	// val1 := br0.peekTopBits(peekBits)
 	MOVQ DI, CX
 	MOVQ DI, CX
-	MOVQ R12, R14
-	SHRQ CL, R14
+	MOVQ R11, R13
+	SHRQ CL, R13
 
 
 	// v1 := table[val1&mask]
 	// v1 := table[val1&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br0.advance(uint8(v1.entry))
 	// br0.advance(uint8(v1.entry))
 	MOVB CH, AH
 	MOVB CH, AH
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// these two writes get coalesced
 	// these two writes get coalesced
 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
-	MOVW AX, (R8)
+	MOVW AX, (BX)
 
 
 	// update the bitreader structure
 	// update the bitreader structure
-	MOVQ R12, 32(R11)
-	MOVB R13, 40(R11)
-	ADDQ R9, R8
+	MOVQ R11, 32(R10)
+	MOVB R12, 40(R10)
 
 
 	// br1.fillFast32()
 	// br1.fillFast32()
-	MOVQ    80(R11), R12
-	MOVBQZX 88(R11), R13
-	CMPQ    R13, $0x20
+	MOVQ    80(R10), R11
+	MOVBQZX 88(R10), R12
+	CMPQ    R12, $0x20
 	JBE     skip_fill1
 	JBE     skip_fill1
-	MOVQ    72(R11), AX
-	SUBQ    $0x20, R13
+	MOVQ    72(R10), AX
+	SUBQ    $0x20, R12
 	SUBQ    $0x04, AX
 	SUBQ    $0x04, AX
-	MOVQ    48(R11), R14
+	MOVQ    48(R10), R13
 
 
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	// b.value |= uint64(low) << (b.bitsRead & 63)
-	MOVL (AX)(R14*1), R14
-	MOVQ R13, CX
-	SHLQ CL, R14
-	MOVQ AX, 72(R11)
-	ORQ  R14, R12
+	MOVL (AX)(R13*1), R13
+	MOVQ R12, CX
+	SHLQ CL, R13
+	MOVQ AX, 72(R10)
+	ORQ  R13, R11
 
 
-	// exhausted = exhausted || (br1.off < 4)
-	CMPQ  AX, $0x04
-	SETLT AL
-	ORB   AL, DL
+	// exhausted += (br1.off < 4)
+	CMPQ AX, $0x04
+	ADCB $+0, DL
 
 
 skip_fill1:
 skip_fill1:
 	// val0 := br1.peekTopBits(peekBits)
 	// val0 := br1.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v0 := table[val0&mask]
 	// v0 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br1.advance(uint8(v0.entry)
 	// br1.advance(uint8(v0.entry)
 	MOVB CH, AL
 	MOVB CH, AL
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val1 := br1.peekTopBits(peekBits)
 	// val1 := br1.peekTopBits(peekBits)
 	MOVQ DI, CX
 	MOVQ DI, CX
-	MOVQ R12, R14
-	SHRQ CL, R14
+	MOVQ R11, R13
+	SHRQ CL, R13
 
 
 	// v1 := table[val1&mask]
 	// v1 := table[val1&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br1.advance(uint8(v1.entry))
 	// br1.advance(uint8(v1.entry))
 	MOVB CH, AH
 	MOVB CH, AH
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// these two writes get coalesced
 	// these two writes get coalesced
 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
-	MOVW AX, (R8)
+	MOVW AX, (BX)(R8*1)
 
 
 	// update the bitreader structure
 	// update the bitreader structure
-	MOVQ R12, 80(R11)
-	MOVB R13, 88(R11)
-	ADDQ R9, R8
+	MOVQ R11, 80(R10)
+	MOVB R12, 88(R10)
 
 
 	// br2.fillFast32()
 	// br2.fillFast32()
-	MOVQ    128(R11), R12
-	MOVBQZX 136(R11), R13
-	CMPQ    R13, $0x20
+	MOVQ    128(R10), R11
+	MOVBQZX 136(R10), R12
+	CMPQ    R12, $0x20
 	JBE     skip_fill2
 	JBE     skip_fill2
-	MOVQ    120(R11), AX
-	SUBQ    $0x20, R13
+	MOVQ    120(R10), AX
+	SUBQ    $0x20, R12
 	SUBQ    $0x04, AX
 	SUBQ    $0x04, AX
-	MOVQ    96(R11), R14
+	MOVQ    96(R10), R13
 
 
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	// b.value |= uint64(low) << (b.bitsRead & 63)
-	MOVL (AX)(R14*1), R14
-	MOVQ R13, CX
-	SHLQ CL, R14
-	MOVQ AX, 120(R11)
-	ORQ  R14, R12
+	MOVL (AX)(R13*1), R13
+	MOVQ R12, CX
+	SHLQ CL, R13
+	MOVQ AX, 120(R10)
+	ORQ  R13, R11
 
 
-	// exhausted = exhausted || (br2.off < 4)
-	CMPQ  AX, $0x04
-	SETLT AL
-	ORB   AL, DL
+	// exhausted += (br2.off < 4)
+	CMPQ AX, $0x04
+	ADCB $+0, DL
 
 
 skip_fill2:
 skip_fill2:
 	// val0 := br2.peekTopBits(peekBits)
 	// val0 := br2.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v0 := table[val0&mask]
 	// v0 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br2.advance(uint8(v0.entry)
 	// br2.advance(uint8(v0.entry)
 	MOVB CH, AL
 	MOVB CH, AL
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val1 := br2.peekTopBits(peekBits)
 	// val1 := br2.peekTopBits(peekBits)
 	MOVQ DI, CX
 	MOVQ DI, CX
-	MOVQ R12, R14
-	SHRQ CL, R14
+	MOVQ R11, R13
+	SHRQ CL, R13
 
 
 	// v1 := table[val1&mask]
 	// v1 := table[val1&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br2.advance(uint8(v1.entry))
 	// br2.advance(uint8(v1.entry))
 	MOVB CH, AH
 	MOVB CH, AH
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// these two writes get coalesced
 	// these two writes get coalesced
 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
-	MOVW AX, (R8)
+	MOVW AX, (BX)(R8*2)
 
 
 	// update the bitreader structure
 	// update the bitreader structure
-	MOVQ R12, 128(R11)
-	MOVB R13, 136(R11)
-	ADDQ R9, R8
+	MOVQ R11, 128(R10)
+	MOVB R12, 136(R10)
 
 
 	// br3.fillFast32()
 	// br3.fillFast32()
-	MOVQ    176(R11), R12
-	MOVBQZX 184(R11), R13
-	CMPQ    R13, $0x20
+	MOVQ    176(R10), R11
+	MOVBQZX 184(R10), R12
+	CMPQ    R12, $0x20
 	JBE     skip_fill3
 	JBE     skip_fill3
-	MOVQ    168(R11), AX
-	SUBQ    $0x20, R13
+	MOVQ    168(R10), AX
+	SUBQ    $0x20, R12
 	SUBQ    $0x04, AX
 	SUBQ    $0x04, AX
-	MOVQ    144(R11), R14
+	MOVQ    144(R10), R13
 
 
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	// b.value |= uint64(low) << (b.bitsRead & 63)
-	MOVL (AX)(R14*1), R14
-	MOVQ R13, CX
-	SHLQ CL, R14
-	MOVQ AX, 168(R11)
-	ORQ  R14, R12
+	MOVL (AX)(R13*1), R13
+	MOVQ R12, CX
+	SHLQ CL, R13
+	MOVQ AX, 168(R10)
+	ORQ  R13, R11
 
 
-	// exhausted = exhausted || (br3.off < 4)
-	CMPQ  AX, $0x04
-	SETLT AL
-	ORB   AL, DL
+	// exhausted += (br3.off < 4)
+	CMPQ AX, $0x04
+	ADCB $+0, DL
 
 
 skip_fill3:
 skip_fill3:
 	// val0 := br3.peekTopBits(peekBits)
 	// val0 := br3.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v0 := table[val0&mask]
 	// v0 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br3.advance(uint8(v0.entry)
 	// br3.advance(uint8(v0.entry)
 	MOVB CH, AL
 	MOVB CH, AL
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val1 := br3.peekTopBits(peekBits)
 	// val1 := br3.peekTopBits(peekBits)
 	MOVQ DI, CX
 	MOVQ DI, CX
-	MOVQ R12, R14
-	SHRQ CL, R14
+	MOVQ R11, R13
+	SHRQ CL, R13
 
 
 	// v1 := table[val1&mask]
 	// v1 := table[val1&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br3.advance(uint8(v1.entry))
 	// br3.advance(uint8(v1.entry))
 	MOVB CH, AH
 	MOVB CH, AH
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// these two writes get coalesced
 	// these two writes get coalesced
 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
-	MOVW AX, (R8)
+	LEAQ (R8)(R8*2), CX
+	MOVW AX, (BX)(CX*1)
 
 
 	// update the bitreader structure
 	// update the bitreader structure
-	MOVQ  R12, 176(R11)
-	MOVB  R13, 184(R11)
-	ADDQ  $0x02, SI
+	MOVQ  R11, 176(R10)
+	MOVB  R12, 184(R10)
+	ADDQ  $0x02, BX
 	TESTB DL, DL
 	TESTB DL, DL
 	JZ    main_loop
 	JZ    main_loop
 	MOVQ  ctx+0(FP), AX
 	MOVQ  ctx+0(FP), AX
-	SUBQ  16(AX), SI
-	SHLQ  $0x02, SI
-	MOVQ  SI, 40(AX)
+	SUBQ  16(AX), BX
+	SHLQ  $0x02, BX
+	MOVQ  BX, 40(AX)
 	RET
 	RET
 
 
 // func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
 // func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
 TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
 TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
-	XORQ DX, DX
-
 	// Preload values
 	// Preload values
 	MOVQ    ctx+0(FP), CX
 	MOVQ    ctx+0(FP), CX
 	MOVBQZX 8(CX), DI
 	MOVBQZX 8(CX), DI
 	MOVQ    16(CX), BX
 	MOVQ    16(CX), BX
 	MOVQ    48(CX), SI
 	MOVQ    48(CX), SI
-	MOVQ    24(CX), R9
-	MOVQ    32(CX), R10
-	MOVQ    (CX), R11
+	MOVQ    24(CX), R8
+	MOVQ    32(CX), R9
+	MOVQ    (CX), R10
 
 
 	// Main loop
 	// Main loop
 main_loop:
 main_loop:
-	MOVQ  BX, R8
-	CMPQ  R8, SI
+	XORL  DX, DX
+	CMPQ  BX, SI
 	SETGE DL
 	SETGE DL
 
 
 	// br0.fillFast32()
 	// br0.fillFast32()
-	MOVQ    32(R11), R12
-	MOVBQZX 40(R11), R13
-	CMPQ    R13, $0x20
+	MOVQ    32(R10), R11
+	MOVBQZX 40(R10), R12
+	CMPQ    R12, $0x20
 	JBE     skip_fill0
 	JBE     skip_fill0
-	MOVQ    24(R11), R14
-	SUBQ    $0x20, R13
-	SUBQ    $0x04, R14
-	MOVQ    (R11), R15
+	MOVQ    24(R10), R13
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, R13
+	MOVQ    (R10), R14
 
 
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	// b.value |= uint64(low) << (b.bitsRead & 63)
-	MOVL (R14)(R15*1), R15
-	MOVQ R13, CX
-	SHLQ CL, R15
-	MOVQ R14, 24(R11)
-	ORQ  R15, R12
+	MOVL (R13)(R14*1), R14
+	MOVQ R12, CX
+	SHLQ CL, R14
+	MOVQ R13, 24(R10)
+	ORQ  R14, R11
 
 
-	// exhausted = exhausted || (br0.off < 4)
-	CMPQ  R14, $0x04
-	SETLT AL
-	ORB   AL, DL
+	// exhausted += (br0.off < 4)
+	CMPQ R13, $0x04
+	ADCB $+0, DL
 
 
 skip_fill0:
 skip_fill0:
 	// val0 := br0.peekTopBits(peekBits)
 	// val0 := br0.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v0 := table[val0&mask]
 	// v0 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br0.advance(uint8(v0.entry)
 	// br0.advance(uint8(v0.entry)
 	MOVB CH, AL
 	MOVB CH, AL
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val1 := br0.peekTopBits(peekBits)
 	// val1 := br0.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v1 := table[val0&mask]
 	// v1 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br0.advance(uint8(v1.entry)
 	// br0.advance(uint8(v1.entry)
 	MOVB   CH, AH
 	MOVB   CH, AH
-	SHLQ   CL, R12
-	ADDB   CL, R13
+	SHLQ   CL, R11
+	ADDB   CL, R12
 	BSWAPL AX
 	BSWAPL AX
 
 
 	// val2 := br0.peekTopBits(peekBits)
 	// val2 := br0.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v2 := table[val0&mask]
 	// v2 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br0.advance(uint8(v2.entry)
 	// br0.advance(uint8(v2.entry)
 	MOVB CH, AH
 	MOVB CH, AH
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val3 := br0.peekTopBits(peekBits)
 	// val3 := br0.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v3 := table[val0&mask]
 	// v3 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br0.advance(uint8(v3.entry)
 	// br0.advance(uint8(v3.entry)
 	MOVB   CH, AL
 	MOVB   CH, AL
-	SHLQ   CL, R12
-	ADDB   CL, R13
+	SHLQ   CL, R11
+	ADDB   CL, R12
 	BSWAPL AX
 	BSWAPL AX
 
 
 	// these four writes get coalesced
 	// these four writes get coalesced
@@ -365,88 +354,86 @@ skip_fill0:
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
-	MOVL AX, (R8)
+	MOVL AX, (BX)
 
 
 	// update the bitreader structure
 	// update the bitreader structure
-	MOVQ R12, 32(R11)
-	MOVB R13, 40(R11)
-	ADDQ R9, R8
+	MOVQ R11, 32(R10)
+	MOVB R12, 40(R10)
 
 
 	// br1.fillFast32()
 	// br1.fillFast32()
-	MOVQ    80(R11), R12
-	MOVBQZX 88(R11), R13
-	CMPQ    R13, $0x20
+	MOVQ    80(R10), R11
+	MOVBQZX 88(R10), R12
+	CMPQ    R12, $0x20
 	JBE     skip_fill1
 	JBE     skip_fill1
-	MOVQ    72(R11), R14
-	SUBQ    $0x20, R13
-	SUBQ    $0x04, R14
-	MOVQ    48(R11), R15
+	MOVQ    72(R10), R13
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, R13
+	MOVQ    48(R10), R14
 
 
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	// b.value |= uint64(low) << (b.bitsRead & 63)
-	MOVL (R14)(R15*1), R15
-	MOVQ R13, CX
-	SHLQ CL, R15
-	MOVQ R14, 72(R11)
-	ORQ  R15, R12
+	MOVL (R13)(R14*1), R14
+	MOVQ R12, CX
+	SHLQ CL, R14
+	MOVQ R13, 72(R10)
+	ORQ  R14, R11
 
 
-	// exhausted = exhausted || (br1.off < 4)
-	CMPQ  R14, $0x04
-	SETLT AL
-	ORB   AL, DL
+	// exhausted += (br1.off < 4)
+	CMPQ R13, $0x04
+	ADCB $+0, DL
 
 
 skip_fill1:
 skip_fill1:
 	// val0 := br1.peekTopBits(peekBits)
 	// val0 := br1.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v0 := table[val0&mask]
 	// v0 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br1.advance(uint8(v0.entry)
 	// br1.advance(uint8(v0.entry)
 	MOVB CH, AL
 	MOVB CH, AL
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val1 := br1.peekTopBits(peekBits)
 	// val1 := br1.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v1 := table[val0&mask]
 	// v1 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br1.advance(uint8(v1.entry)
 	// br1.advance(uint8(v1.entry)
 	MOVB   CH, AH
 	MOVB   CH, AH
-	SHLQ   CL, R12
-	ADDB   CL, R13
+	SHLQ   CL, R11
+	ADDB   CL, R12
 	BSWAPL AX
 	BSWAPL AX
 
 
 	// val2 := br1.peekTopBits(peekBits)
 	// val2 := br1.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v2 := table[val0&mask]
 	// v2 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br1.advance(uint8(v2.entry)
 	// br1.advance(uint8(v2.entry)
 	MOVB CH, AH
 	MOVB CH, AH
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val3 := br1.peekTopBits(peekBits)
 	// val3 := br1.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v3 := table[val0&mask]
 	// v3 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br1.advance(uint8(v3.entry)
 	// br1.advance(uint8(v3.entry)
 	MOVB   CH, AL
 	MOVB   CH, AL
-	SHLQ   CL, R12
-	ADDB   CL, R13
+	SHLQ   CL, R11
+	ADDB   CL, R12
 	BSWAPL AX
 	BSWAPL AX
 
 
 	// these four writes get coalesced
 	// these four writes get coalesced
@@ -454,88 +441,86 @@ skip_fill1:
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
-	MOVL AX, (R8)
+	MOVL AX, (BX)(R8*1)
 
 
 	// update the bitreader structure
 	// update the bitreader structure
-	MOVQ R12, 80(R11)
-	MOVB R13, 88(R11)
-	ADDQ R9, R8
+	MOVQ R11, 80(R10)
+	MOVB R12, 88(R10)
 
 
 	// br2.fillFast32()
 	// br2.fillFast32()
-	MOVQ    128(R11), R12
-	MOVBQZX 136(R11), R13
-	CMPQ    R13, $0x20
+	MOVQ    128(R10), R11
+	MOVBQZX 136(R10), R12
+	CMPQ    R12, $0x20
 	JBE     skip_fill2
 	JBE     skip_fill2
-	MOVQ    120(R11), R14
-	SUBQ    $0x20, R13
-	SUBQ    $0x04, R14
-	MOVQ    96(R11), R15
+	MOVQ    120(R10), R13
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, R13
+	MOVQ    96(R10), R14
 
 
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	// b.value |= uint64(low) << (b.bitsRead & 63)
-	MOVL (R14)(R15*1), R15
-	MOVQ R13, CX
-	SHLQ CL, R15
-	MOVQ R14, 120(R11)
-	ORQ  R15, R12
+	MOVL (R13)(R14*1), R14
+	MOVQ R12, CX
+	SHLQ CL, R14
+	MOVQ R13, 120(R10)
+	ORQ  R14, R11
 
 
-	// exhausted = exhausted || (br2.off < 4)
-	CMPQ  R14, $0x04
-	SETLT AL
-	ORB   AL, DL
+	// exhausted += (br2.off < 4)
+	CMPQ R13, $0x04
+	ADCB $+0, DL
 
 
 skip_fill2:
 skip_fill2:
 	// val0 := br2.peekTopBits(peekBits)
 	// val0 := br2.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v0 := table[val0&mask]
 	// v0 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br2.advance(uint8(v0.entry)
 	// br2.advance(uint8(v0.entry)
 	MOVB CH, AL
 	MOVB CH, AL
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val1 := br2.peekTopBits(peekBits)
 	// val1 := br2.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v1 := table[val0&mask]
 	// v1 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br2.advance(uint8(v1.entry)
 	// br2.advance(uint8(v1.entry)
 	MOVB   CH, AH
 	MOVB   CH, AH
-	SHLQ   CL, R12
-	ADDB   CL, R13
+	SHLQ   CL, R11
+	ADDB   CL, R12
 	BSWAPL AX
 	BSWAPL AX
 
 
 	// val2 := br2.peekTopBits(peekBits)
 	// val2 := br2.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v2 := table[val0&mask]
 	// v2 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br2.advance(uint8(v2.entry)
 	// br2.advance(uint8(v2.entry)
 	MOVB CH, AH
 	MOVB CH, AH
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val3 := br2.peekTopBits(peekBits)
 	// val3 := br2.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v3 := table[val0&mask]
 	// v3 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br2.advance(uint8(v3.entry)
 	// br2.advance(uint8(v3.entry)
 	MOVB   CH, AL
 	MOVB   CH, AL
-	SHLQ   CL, R12
-	ADDB   CL, R13
+	SHLQ   CL, R11
+	ADDB   CL, R12
 	BSWAPL AX
 	BSWAPL AX
 
 
 	// these four writes get coalesced
 	// these four writes get coalesced
@@ -543,88 +528,86 @@ skip_fill2:
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
-	MOVL AX, (R8)
+	MOVL AX, (BX)(R8*2)
 
 
 	// update the bitreader structure
 	// update the bitreader structure
-	MOVQ R12, 128(R11)
-	MOVB R13, 136(R11)
-	ADDQ R9, R8
+	MOVQ R11, 128(R10)
+	MOVB R12, 136(R10)
 
 
 	// br3.fillFast32()
 	// br3.fillFast32()
-	MOVQ    176(R11), R12
-	MOVBQZX 184(R11), R13
-	CMPQ    R13, $0x20
+	MOVQ    176(R10), R11
+	MOVBQZX 184(R10), R12
+	CMPQ    R12, $0x20
 	JBE     skip_fill3
 	JBE     skip_fill3
-	MOVQ    168(R11), R14
-	SUBQ    $0x20, R13
-	SUBQ    $0x04, R14
-	MOVQ    144(R11), R15
+	MOVQ    168(R10), R13
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, R13
+	MOVQ    144(R10), R14
 
 
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	// b.value |= uint64(low) << (b.bitsRead & 63)
-	MOVL (R14)(R15*1), R15
-	MOVQ R13, CX
-	SHLQ CL, R15
-	MOVQ R14, 168(R11)
-	ORQ  R15, R12
+	MOVL (R13)(R14*1), R14
+	MOVQ R12, CX
+	SHLQ CL, R14
+	MOVQ R13, 168(R10)
+	ORQ  R14, R11
 
 
-	// exhausted = exhausted || (br3.off < 4)
-	CMPQ  R14, $0x04
-	SETLT AL
-	ORB   AL, DL
+	// exhausted += (br3.off < 4)
+	CMPQ R13, $0x04
+	ADCB $+0, DL
 
 
 skip_fill3:
 skip_fill3:
 	// val0 := br3.peekTopBits(peekBits)
 	// val0 := br3.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v0 := table[val0&mask]
 	// v0 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br3.advance(uint8(v0.entry)
 	// br3.advance(uint8(v0.entry)
 	MOVB CH, AL
 	MOVB CH, AL
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val1 := br3.peekTopBits(peekBits)
 	// val1 := br3.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v1 := table[val0&mask]
 	// v1 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br3.advance(uint8(v1.entry)
 	// br3.advance(uint8(v1.entry)
 	MOVB   CH, AH
 	MOVB   CH, AH
-	SHLQ   CL, R12
-	ADDB   CL, R13
+	SHLQ   CL, R11
+	ADDB   CL, R12
 	BSWAPL AX
 	BSWAPL AX
 
 
 	// val2 := br3.peekTopBits(peekBits)
 	// val2 := br3.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v2 := table[val0&mask]
 	// v2 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br3.advance(uint8(v2.entry)
 	// br3.advance(uint8(v2.entry)
 	MOVB CH, AH
 	MOVB CH, AH
-	SHLQ CL, R12
-	ADDB CL, R13
+	SHLQ CL, R11
+	ADDB CL, R12
 
 
 	// val3 := br3.peekTopBits(peekBits)
 	// val3 := br3.peekTopBits(peekBits)
-	MOVQ R12, R14
+	MOVQ R11, R13
 	MOVQ DI, CX
 	MOVQ DI, CX
-	SHRQ CL, R14
+	SHRQ CL, R13
 
 
 	// v3 := table[val0&mask]
 	// v3 := table[val0&mask]
-	MOVW (R10)(R14*2), CX
+	MOVW (R9)(R13*2), CX
 
 
 	// br3.advance(uint8(v3.entry)
 	// br3.advance(uint8(v3.entry)
 	MOVB   CH, AL
 	MOVB   CH, AL
-	SHLQ   CL, R12
-	ADDB   CL, R13
+	SHLQ   CL, R11
+	ADDB   CL, R12
 	BSWAPL AX
 	BSWAPL AX
 
 
 	// these four writes get coalesced
 	// these four writes get coalesced
@@ -632,11 +615,12 @@ skip_fill3:
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
-	MOVL AX, (R8)
+	LEAQ (R8)(R8*2), CX
+	MOVL AX, (BX)(CX*1)
 
 
 	// update the bitreader structure
 	// update the bitreader structure
-	MOVQ  R12, 176(R11)
-	MOVB  R13, 184(R11)
+	MOVQ  R11, 176(R10)
+	MOVB  R12, 184(R10)
 	ADDQ  $0x04, BX
 	ADDQ  $0x04, BX
 	TESTB DL, DL
 	TESTB DL, DL
 	JZ    main_loop
 	JZ    main_loop
@@ -652,7 +636,7 @@ TEXT ·decompress1x_main_loop_amd64(SB), $0-8
 	MOVQ    16(CX), DX
 	MOVQ    16(CX), DX
 	MOVQ    24(CX), BX
 	MOVQ    24(CX), BX
 	CMPQ    BX, $0x04
 	CMPQ    BX, $0x04
-	JB      error_max_decoded_size_exeeded
+	JB      error_max_decoded_size_exceeded
 	LEAQ    (DX)(BX*1), BX
 	LEAQ    (DX)(BX*1), BX
 	MOVQ    (CX), SI
 	MOVQ    (CX), SI
 	MOVQ    (SI), R8
 	MOVQ    (SI), R8
@@ -667,7 +651,7 @@ main_loop:
 	// Check if we have room for 4 bytes in the output buffer
 	// Check if we have room for 4 bytes in the output buffer
 	LEAQ 4(DX), CX
 	LEAQ 4(DX), CX
 	CMPQ CX, BX
 	CMPQ CX, BX
-	JGE  error_max_decoded_size_exeeded
+	JGE  error_max_decoded_size_exceeded
 
 
 	// Decode 4 values
 	// Decode 4 values
 	CMPQ R11, $0x20
 	CMPQ R11, $0x20
@@ -744,7 +728,7 @@ loop_condition:
 	RET
 	RET
 
 
 	// Report error
 	// Report error
-error_max_decoded_size_exeeded:
+error_max_decoded_size_exceeded:
 	MOVQ ctx+0(FP), AX
 	MOVQ ctx+0(FP), AX
 	MOVQ $-1, CX
 	MOVQ $-1, CX
 	MOVQ CX, 40(AX)
 	MOVQ CX, 40(AX)
@@ -757,7 +741,7 @@ TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
 	MOVQ    16(CX), DX
 	MOVQ    16(CX), DX
 	MOVQ    24(CX), BX
 	MOVQ    24(CX), BX
 	CMPQ    BX, $0x04
 	CMPQ    BX, $0x04
-	JB      error_max_decoded_size_exeeded
+	JB      error_max_decoded_size_exceeded
 	LEAQ    (DX)(BX*1), BX
 	LEAQ    (DX)(BX*1), BX
 	MOVQ    (CX), SI
 	MOVQ    (CX), SI
 	MOVQ    (SI), R8
 	MOVQ    (SI), R8
@@ -772,7 +756,7 @@ main_loop:
 	// Check if we have room for 4 bytes in the output buffer
 	// Check if we have room for 4 bytes in the output buffer
 	LEAQ 4(DX), CX
 	LEAQ 4(DX), CX
 	CMPQ CX, BX
 	CMPQ CX, BX
-	JGE  error_max_decoded_size_exeeded
+	JGE  error_max_decoded_size_exceeded
 
 
 	// Decode 4 values
 	// Decode 4 values
 	CMPQ  R11, $0x20
 	CMPQ  R11, $0x20
@@ -839,7 +823,7 @@ loop_condition:
 	RET
 	RET
 
 
 	// Report error
 	// Report error
-error_max_decoded_size_exeeded:
+error_max_decoded_size_exceeded:
 	MOVQ ctx+0(FP), AX
 	MOVQ ctx+0(FP), AX
 	MOVQ $-1, CX
 	MOVQ $-1, CX
 	MOVQ CX, 40(AX)
 	MOVQ CX, 40(AX)

+ 22 - 12
vendor/github.com/klauspost/compress/internal/snapref/encode_other.go

@@ -87,22 +87,32 @@ func emitCopy(dst []byte, offset, length int) int {
 	return i + 2
 	return i + 2
 }
 }
 
 
-// extendMatch returns the largest k such that k <= len(src) and that
-// src[i:i+k-j] and src[j:k] have the same contents.
-//
-// It assumes that:
-//
-//	0 <= i && i < j && j <= len(src)
-func extendMatch(src []byte, i, j int) int {
-	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
-	}
-	return j
-}
-
 func hash(u, shift uint32) uint32 {
 func hash(u, shift uint32) uint32 {
 	return (u * 0x1e35a7bd) >> shift
 	return (u * 0x1e35a7bd) >> shift
 }
 }
 
 
+// EncodeBlockInto exposes encodeBlock but checks dst size.
+func EncodeBlockInto(dst, src []byte) (d int) {
+	if MaxEncodedLen(len(src)) > len(dst) {
+		return 0
+	}
+
+	// encodeBlock breaks on too big blocks, so split.
+	for len(src) > 0 {
+		p := src
+		src = nil
+		if len(p) > maxBlockSize {
+			p, src = p[:maxBlockSize], p[maxBlockSize:]
+		}
+		if len(p) < minNonLiteralBlockSize {
+			d += emitLiteral(dst[d:], p)
+		} else {
+			d += encodeBlock(dst[d:], p)
+		}
+	}
+	return d
+}
+
 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
 // been written.

+ 1 - 1
vendor/github.com/klauspost/compress/zstd/README.md

@@ -304,7 +304,7 @@ import "github.com/klauspost/compress/zstd"
 
 
 // Create a reader that caches decompressors.
 // Create a reader that caches decompressors.
 // For this operation type we supply a nil Reader.
 // For this operation type we supply a nil Reader.
-var decoder, _ = zstd.NewReader(nil, WithDecoderConcurrency(0))
+var decoder, _ = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0))
 
 
 // Decompress a buffer. We don't supply a destination buffer,
 // Decompress a buffer. We don't supply a destination buffer,
 // so it will be allocated by the decoder.
 // so it will be allocated by the decoder.

+ 15 - 19
vendor/github.com/klauspost/compress/zstd/bitreader.go

@@ -17,7 +17,6 @@ import (
 // for aligning the input.
 // for aligning the input.
 type bitReader struct {
 type bitReader struct {
 	in       []byte
 	in       []byte
-	off      uint   // next byte to read is at in[off - 1]
 	value    uint64 // Maybe use [16]byte, but shifting is awkward.
 	value    uint64 // Maybe use [16]byte, but shifting is awkward.
 	bitsRead uint8
 	bitsRead uint8
 }
 }
@@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error {
 		return errors.New("corrupt stream: too short")
 		return errors.New("corrupt stream: too short")
 	}
 	}
 	b.in = in
 	b.in = in
-	b.off = uint(len(in))
 	// The highest bit of the last byte indicates where to start
 	// The highest bit of the last byte indicates where to start
 	v := in[len(in)-1]
 	v := in[len(in)-1]
 	if v == 0 {
 	if v == 0 {
@@ -69,21 +67,19 @@ func (b *bitReader) fillFast() {
 	if b.bitsRead < 32 {
 	if b.bitsRead < 32 {
 		return
 		return
 	}
 	}
-	// 2 bounds checks.
-	v := b.in[b.off-4:]
-	v = v[:4]
+	v := b.in[len(b.in)-4:]
+	b.in = b.in[:len(b.in)-4]
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	b.value = (b.value << 32) | uint64(low)
 	b.value = (b.value << 32) | uint64(low)
 	b.bitsRead -= 32
 	b.bitsRead -= 32
-	b.off -= 4
 }
 }
 
 
 // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
 // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
 func (b *bitReader) fillFastStart() {
 func (b *bitReader) fillFastStart() {
-	// Do single re-slice to avoid bounds checks.
-	b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+	v := b.in[len(b.in)-8:]
+	b.in = b.in[:len(b.in)-8]
+	b.value = binary.LittleEndian.Uint64(v)
 	b.bitsRead = 0
 	b.bitsRead = 0
-	b.off -= 8
 }
 }
 
 
 // fill() will make sure at least 32 bits are available.
 // fill() will make sure at least 32 bits are available.
@@ -91,25 +87,25 @@ func (b *bitReader) fill() {
 	if b.bitsRead < 32 {
 	if b.bitsRead < 32 {
 		return
 		return
 	}
 	}
-	if b.off >= 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
+	if len(b.in) >= 4 {
+		v := b.in[len(b.in)-4:]
+		b.in = b.in[:len(b.in)-4]
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		b.value = (b.value << 32) | uint64(low)
 		b.value = (b.value << 32) | uint64(low)
 		b.bitsRead -= 32
 		b.bitsRead -= 32
-		b.off -= 4
 		return
 		return
 	}
 	}
-	for b.off > 0 {
-		b.value = (b.value << 8) | uint64(b.in[b.off-1])
-		b.bitsRead -= 8
-		b.off--
+
+	b.bitsRead -= uint8(8 * len(b.in))
+	for len(b.in) > 0 {
+		b.value = (b.value << 8) | uint64(b.in[len(b.in)-1])
+		b.in = b.in[:len(b.in)-1]
 	}
 	}
 }
 }
 
 
 // finished returns true if all bits have been read from the bit stream.
 // finished returns true if all bits have been read from the bit stream.
 func (b *bitReader) finished() bool {
 func (b *bitReader) finished() bool {
-	return b.off == 0 && b.bitsRead >= 64
+	return len(b.in) == 0 && b.bitsRead >= 64
 }
 }
 
 
 // overread returns true if more bits have been requested than is on the stream.
 // overread returns true if more bits have been requested than is on the stream.
@@ -119,7 +115,7 @@ func (b *bitReader) overread() bool {
 
 
 // remain returns the number of bits remaining.
 // remain returns the number of bits remaining.
 func (b *bitReader) remain() uint {
 func (b *bitReader) remain() uint {
-	return b.off*8 + 64 - uint(b.bitsRead)
+	return 8*uint(len(b.in)) + 64 - uint(b.bitsRead)
 }
 }
 
 
 // close the bitstream and returns an error if out-of-buffer reads occurred.
 // close the bitstream and returns an error if out-of-buffer reads occurred.

+ 1 - 2
vendor/github.com/klauspost/compress/zstd/bitwriter.go

@@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() {
 
 
 // close will write the alignment bit and write the final byte(s)
 // close will write the alignment bit and write the final byte(s)
 // to the output.
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	// End mark
 	b.addBits16Clean(1, 1)
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	// flush until next byte.
 	b.flushAlign()
 	b.flushAlign()
-	return nil
 }
 }
 
 
 // reset and continue writing by appending to out.
 // reset and continue writing by appending to out.

+ 13 - 7
vendor/github.com/klauspost/compress/zstd/blockdec.go

@@ -9,6 +9,7 @@ import (
 	"encoding/binary"
 	"encoding/binary"
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
+	"hash/crc32"
 	"io"
 	"io"
 	"os"
 	"os"
 	"path/filepath"
 	"path/filepath"
@@ -82,8 +83,9 @@ type blockDec struct {
 
 
 	err error
 	err error
 
 
-	// Check against this crc
-	checkCRC []byte
+	// Check against this crc, if hasCRC is true.
+	checkCRC uint32
+	hasCRC   bool
 
 
 	// Frame to use for singlethreaded decoding.
 	// Frame to use for singlethreaded decoding.
 	// Should not be used by the decoder itself since parent may be another frame.
 	// Should not be used by the decoder itself since parent may be another frame.
@@ -191,16 +193,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 	}
 	}
 
 
 	// Read block data.
 	// Read block data.
-	if cap(b.dataStorage) < cSize {
+	if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize {
+		// byteBuf doesn't need a destination buffer.
 		if b.lowMem || cSize > maxCompressedBlockSize {
 		if b.lowMem || cSize > maxCompressedBlockSize {
 			b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
 			b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
 		} else {
 		} else {
 			b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
 			b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
 		}
 		}
 	}
 	}
-	if cap(b.dst) <= maxSize {
-		b.dst = make([]byte, 0, maxSize+1)
-	}
 	b.data, err = br.readBig(cSize, b.dataStorage)
 	b.data, err = br.readBig(cSize, b.dataStorage)
 	if err != nil {
 	if err != nil {
 		if debugDecoder {
 		if debugDecoder {
@@ -209,6 +209,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 		}
 		}
 		return err
 		return err
 	}
 	}
+	if cap(b.dst) <= maxSize {
+		b.dst = make([]byte, 0, maxSize+1)
+	}
 	return nil
 	return nil
 }
 }
 
 
@@ -440,6 +443,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
 			}
 			}
 		}
 		}
 		var err error
 		var err error
+		if debugDecoder {
+			println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
+		}
 		huff, literals, err = huff0.ReadTable(literals, huff)
 		huff, literals, err = huff0.ReadTable(literals, huff)
 		if err != nil {
 		if err != nil {
 			println("reading huffman table:", err)
 			println("reading huffman table:", err)
@@ -586,7 +592,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
 				}
 				}
 				seq.fse.setRLE(symb)
 				seq.fse.setRLE(symb)
 				if debugDecoder {
 				if debugDecoder {
-					printf("RLE set to %+v, code: %v", symb, v)
+					printf("RLE set to 0x%x, code: %v", symb, v)
 				}
 				}
 			case compModeFSE:
 			case compModeFSE:
 				println("Reading table for", tableIndex(i))
 				println("Reading table for", tableIndex(i))

+ 28 - 10
vendor/github.com/klauspost/compress/zstd/blockenc.go

@@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
 	if len(lits) >= 1024 {
 	if len(lits) >= 1024 {
 		// Use 4 Streams.
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
 		out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
-	} else if len(lits) > 32 {
+	} else if len(lits) > 16 {
 		// Use 1 stream
 		// Use 1 stream
 		single = true
 		single = true
 		out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
 		out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
 	} else {
 	} else {
 		err = huff0.ErrIncompressible
 		err = huff0.ErrIncompressible
 	}
 	}
-
+	if err == nil && len(out)+5 > len(lits) {
+		// If we are close, we may still be worse or equal to raw.
+		var lh literalsHeader
+		lh.setSizes(len(out), len(lits), single)
+		if len(out)+lh.size() >= len(lits) {
+			err = huff0.ErrIncompressible
+		}
+	}
 	switch err {
 	switch err {
 	case huff0.ErrIncompressible:
 	case huff0.ErrIncompressible:
 		if debugEncoder {
 		if debugEncoder {
@@ -473,7 +480,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		return b.encodeLits(b.literals, rawAllLits)
 		return b.encodeLits(b.literals, rawAllLits)
 	}
 	}
 	// We want some difference to at least account for the headers.
 	// We want some difference to at least account for the headers.
-	saved := b.size - len(b.literals) - (b.size >> 5)
+	saved := b.size - len(b.literals) - (b.size >> 6)
 	if saved < 16 {
 	if saved < 16 {
 		if org == nil {
 		if org == nil {
 			return errIncompressible
 			return errIncompressible
@@ -503,7 +510,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if len(b.literals) >= 1024 && !raw {
 	if len(b.literals) >= 1024 && !raw {
 		// Use 4 Streams.
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
 		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
-	} else if len(b.literals) > 32 && !raw {
+	} else if len(b.literals) > 16 && !raw {
 		// Use 1 stream
 		// Use 1 stream
 		single = true
 		single = true
 		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
 		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
@@ -511,6 +518,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		err = huff0.ErrIncompressible
 		err = huff0.ErrIncompressible
 	}
 	}
 
 
+	if err == nil && len(out)+5 > len(b.literals) {
+		// If we are close, we may still be worse or equal to raw.
+		var lh literalsHeader
+		lh.setSize(len(b.literals))
+		szRaw := lh.size()
+		lh.setSizes(len(out), len(b.literals), single)
+		szComp := lh.size()
+		if len(out)+szComp >= len(b.literals)+szRaw {
+			err = huff0.ErrIncompressible
+		}
+	}
 	switch err {
 	switch err {
 	case huff0.ErrIncompressible:
 	case huff0.ErrIncompressible:
 		lh.setType(literalsBlockRaw)
 		lh.setType(literalsBlockRaw)
@@ -773,16 +791,16 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	ml.flush(mlEnc.actualTableLog)
 	ml.flush(mlEnc.actualTableLog)
 	of.flush(ofEnc.actualTableLog)
 	of.flush(ofEnc.actualTableLog)
 	ll.flush(llEnc.actualTableLog)
 	ll.flush(llEnc.actualTableLog)
-	err = wr.close()
-	if err != nil {
-		return err
-	}
+	wr.close()
 	b.output = wr.out
 	b.output = wr.out
 
 
+	// Maybe even add a bigger margin.
 	if len(b.output)-3-bhOffset >= b.size {
 	if len(b.output)-3-bhOffset >= b.size {
-		// Maybe even add a bigger margin.
+		// Discard and encode as raw block.
+		b.output = b.encodeRawTo(b.output[:bhOffset], org)
+		b.popOffsets()
 		b.litEnc.Reuse = huff0.ReusePolicyNone
 		b.litEnc.Reuse = huff0.ReusePolicyNone
-		return errIncompressible
+		return nil
 	}
 	}
 
 
 	// Size is output minus block header.
 	// Size is output minus block header.

+ 2 - 2
vendor/github.com/klauspost/compress/zstd/bytebuf.go

@@ -54,7 +54,7 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
 func (b *byteBuf) readByte() (byte, error) {
 func (b *byteBuf) readByte() (byte, error) {
 	bb := *b
 	bb := *b
 	if len(bb) < 1 {
 	if len(bb) < 1 {
-		return 0, nil
+		return 0, io.ErrUnexpectedEOF
 	}
 	}
 	r := bb[0]
 	r := bb[0]
 	*b = bb[1:]
 	*b = bb[1:]
@@ -109,7 +109,7 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
 }
 }
 
 
 func (r *readerWrapper) readByte() (byte, error) {
 func (r *readerWrapper) readByte() (byte, error) {
-	n2, err := r.r.Read(r.tmp[:1])
+	n2, err := io.ReadFull(r.r, r.tmp[:1])
 	if err != nil {
 	if err != nil {
 		if err == io.EOF {
 		if err == io.EOF {
 			err = io.ErrUnexpectedEOF
 			err = io.ErrUnexpectedEOF

+ 4 - 5
vendor/github.com/klauspost/compress/zstd/decodeheader.go

@@ -4,7 +4,6 @@
 package zstd
 package zstd
 
 
 import (
 import (
-	"bytes"
 	"encoding/binary"
 	"encoding/binary"
 	"errors"
 	"errors"
 	"io"
 	"io"
@@ -102,8 +101,8 @@ func (h *Header) Decode(in []byte) error {
 	}
 	}
 	h.HeaderSize += 4
 	h.HeaderSize += 4
 	b, in := in[:4], in[4:]
 	b, in := in[:4], in[4:]
-	if !bytes.Equal(b, frameMagic) {
-		if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
+	if string(b) != frameMagic {
+		if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
 			return ErrMagicMismatch
 			return ErrMagicMismatch
 		}
 		}
 		if len(in) < 4 {
 		if len(in) < 4 {
@@ -153,7 +152,7 @@ func (h *Header) Decode(in []byte) error {
 		}
 		}
 		b, in = in[:size], in[size:]
 		b, in = in[:size], in[size:]
 		h.HeaderSize += int(size)
 		h.HeaderSize += int(size)
-		switch size {
+		switch len(b) {
 		case 1:
 		case 1:
 			h.DictionaryID = uint32(b[0])
 			h.DictionaryID = uint32(b[0])
 		case 2:
 		case 2:
@@ -183,7 +182,7 @@ func (h *Header) Decode(in []byte) error {
 		}
 		}
 		b, in = in[:fcsSize], in[fcsSize:]
 		b, in = in[:fcsSize], in[fcsSize:]
 		h.HeaderSize += int(fcsSize)
 		h.HeaderSize += int(fcsSize)
-		switch fcsSize {
+		switch len(b) {
 		case 1:
 		case 1:
 			h.FrameContentSize = uint64(b[0])
 			h.FrameContentSize = uint64(b[0])
 		case 2:
 		case 2:

+ 49 - 51
vendor/github.com/klauspost/compress/zstd/decoder.go

@@ -5,7 +5,6 @@
 package zstd
 package zstd
 
 
 import (
 import (
-	"bytes"
 	"context"
 	"context"
 	"encoding/binary"
 	"encoding/binary"
 	"io"
 	"io"
@@ -41,8 +40,7 @@ type Decoder struct {
 	frame *frameDec
 	frame *frameDec
 
 
 	// Custom dictionaries.
 	// Custom dictionaries.
-	// Always uses copies.
-	dicts map[uint32]dict
+	dicts map[uint32]*dict
 
 
 	// streamWg is the waitgroup for all streams
 	// streamWg is the waitgroup for all streams
 	streamWg sync.WaitGroup
 	streamWg sync.WaitGroup
@@ -104,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
 	}
 	}
 
 
 	// Transfer option dicts.
 	// Transfer option dicts.
-	d.dicts = make(map[uint32]dict, len(d.o.dicts))
+	d.dicts = make(map[uint32]*dict, len(d.o.dicts))
 	for _, dc := range d.o.dicts {
 	for _, dc := range d.o.dicts {
 		d.dicts[dc.id] = dc
 		d.dicts[dc.id] = dc
 	}
 	}
@@ -342,15 +340,8 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 			}
 			}
 			return dst, err
 			return dst, err
 		}
 		}
-		if frame.DictionaryID != nil {
-			dict, ok := d.dicts[*frame.DictionaryID]
-			if !ok {
-				return nil, ErrUnknownDictionary
-			}
-			if debugDecoder {
-				println("setting dict", frame.DictionaryID)
-			}
-			frame.history.setDict(&dict)
+		if err = d.setDict(frame); err != nil {
+			return nil, err
 		}
 		}
 		if frame.WindowSize > d.o.maxWindowSize {
 		if frame.WindowSize > d.o.maxWindowSize {
 			if debugDecoder {
 			if debugDecoder {
@@ -459,26 +450,23 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 		println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
 		println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
 	}
 	}
 
 
-	if !d.o.ignoreChecksum && len(next.b) > 0 {
-		n, err := d.current.crc.Write(next.b)
-		if err == nil {
-			if n != len(next.b) {
-				d.current.err = io.ErrShortWrite
-			}
-		}
+	if d.o.ignoreChecksum {
+		return true
 	}
 	}
-	if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
-		got := d.current.crc.Sum64()
-		var tmp [4]byte
-		binary.LittleEndian.PutUint32(tmp[:], uint32(got))
-		if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
+
+	if len(next.b) > 0 {
+		d.current.crc.Write(next.b)
+	}
+	if next.err == nil && next.d != nil && next.d.hasCRC {
+		got := uint32(d.current.crc.Sum64())
+		if got != next.d.checkCRC {
 			if debugDecoder {
 			if debugDecoder {
-				println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
+				printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
 			}
 			}
 			d.current.err = ErrCRCMismatch
 			d.current.err = ErrCRCMismatch
 		} else {
 		} else {
 			if debugDecoder {
 			if debugDecoder {
-				println("CRC ok", tmp[:])
+				printf("CRC ok %08x\n", got)
 			}
 			}
 		}
 		}
 	}
 	}
@@ -494,18 +482,12 @@ func (d *Decoder) nextBlockSync() (ok bool) {
 		if !d.syncStream.inFrame {
 		if !d.syncStream.inFrame {
 			d.frame.history.reset()
 			d.frame.history.reset()
 			d.current.err = d.frame.reset(&d.syncStream.br)
 			d.current.err = d.frame.reset(&d.syncStream.br)
+			if d.current.err == nil {
+				d.current.err = d.setDict(d.frame)
+			}
 			if d.current.err != nil {
 			if d.current.err != nil {
 				return false
 				return false
 			}
 			}
-			if d.frame.DictionaryID != nil {
-				dict, ok := d.dicts[*d.frame.DictionaryID]
-				if !ok {
-					d.current.err = ErrUnknownDictionary
-					return false
-				} else {
-					d.frame.history.setDict(&dict)
-				}
-			}
 			if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
 			if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
 				d.current.err = ErrDecoderSizeExceeded
 				d.current.err = ErrDecoderSizeExceeded
 				return false
 				return false
@@ -864,13 +846,8 @@ decodeStream:
 		if debugDecoder && err != nil {
 		if debugDecoder && err != nil {
 			println("Frame decoder returned", err)
 			println("Frame decoder returned", err)
 		}
 		}
-		if err == nil && frame.DictionaryID != nil {
-			dict, ok := d.dicts[*frame.DictionaryID]
-			if !ok {
-				err = ErrUnknownDictionary
-			} else {
-				frame.history.setDict(&dict)
-			}
+		if err == nil {
+			err = d.setDict(frame)
 		}
 		}
 		if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
 		if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
 			if debugDecoder {
 			if debugDecoder {
@@ -918,18 +895,22 @@ decodeStream:
 				println("next block returned error:", err)
 				println("next block returned error:", err)
 			}
 			}
 			dec.err = err
 			dec.err = err
-			dec.checkCRC = nil
+			dec.hasCRC = false
 			if dec.Last && frame.HasCheckSum && err == nil {
 			if dec.Last && frame.HasCheckSum && err == nil {
 				crc, err := frame.rawInput.readSmall(4)
 				crc, err := frame.rawInput.readSmall(4)
-				if err != nil {
+				if len(crc) < 4 {
+					if err == nil {
+						err = io.ErrUnexpectedEOF
+
+					}
 					println("CRC missing?", err)
 					println("CRC missing?", err)
 					dec.err = err
 					dec.err = err
-				}
-				var tmp [4]byte
-				copy(tmp[:], crc)
-				dec.checkCRC = tmp[:]
-				if debugDecoder {
-					println("found crc to check:", dec.checkCRC)
+				} else {
+					dec.checkCRC = binary.LittleEndian.Uint32(crc)
+					dec.hasCRC = true
+					if debugDecoder {
+						printf("found crc to check: %08x\n", dec.checkCRC)
+					}
 				}
 				}
 			}
 			}
 			err = dec.err
 			err = dec.err
@@ -948,3 +929,20 @@ decodeStream:
 	hist.reset()
 	hist.reset()
 	d.frame.history.b = frameHistCache
 	d.frame.history.b = frameHistCache
 }
 }
+
+func (d *Decoder) setDict(frame *frameDec) (err error) {
+	dict, ok := d.dicts[frame.DictionaryID]
+	if ok {
+		if debugDecoder {
+			println("setting dict", frame.DictionaryID)
+		}
+		frame.history.setDict(dict)
+	} else if frame.DictionaryID != 0 {
+		// A zero or missing dictionary id is ambiguous:
+		// either dictionary zero, or no dictionary. In particular,
+		// zstd --patch-from uses this id for the source file,
+		// so only return an error if the dictionary id is not zero.
+		err = ErrUnknownDictionary
+	}
+	return err
+}

+ 23 - 3
vendor/github.com/klauspost/compress/zstd/decoder_options.go

@@ -6,6 +6,8 @@ package zstd
 
 
 import (
 import (
 	"errors"
 	"errors"
+	"fmt"
+	"math/bits"
 	"runtime"
 	"runtime"
 )
 )
 
 
@@ -18,7 +20,7 @@ type decoderOptions struct {
 	concurrent      int
 	concurrent      int
 	maxDecodedSize  uint64
 	maxDecodedSize  uint64
 	maxWindowSize   uint64
 	maxWindowSize   uint64
-	dicts           []dict
+	dicts           []*dict
 	ignoreChecksum  bool
 	ignoreChecksum  bool
 	limitToCap      bool
 	limitToCap      bool
 	decodeBufsBelow int
 	decodeBufsBelow int
@@ -85,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption {
 }
 }
 
 
 // WithDecoderDicts allows to register one or more dictionaries for the decoder.
 // WithDecoderDicts allows to register one or more dictionaries for the decoder.
-// If several dictionaries with the same ID is provided the last one will be used.
+//
+// Each slice in dict must be in the [dictionary format] produced by
+// "zstd --train" from the Zstandard reference implementation.
+//
+// If several dictionaries with the same ID are provided, the last one will be used.
+//
+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
 func WithDecoderDicts(dicts ...[]byte) DOption {
 func WithDecoderDicts(dicts ...[]byte) DOption {
 	return func(o *decoderOptions) error {
 	return func(o *decoderOptions) error {
 		for _, b := range dicts {
 		for _, b := range dicts {
@@ -93,12 +101,24 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
 			if err != nil {
 			if err != nil {
 				return err
 				return err
 			}
 			}
-			o.dicts = append(o.dicts, *d)
+			o.dicts = append(o.dicts, d)
 		}
 		}
 		return nil
 		return nil
 	}
 	}
 }
 }
 
 
+// WithDecoderDictRaw registers a dictionary that may be used by the decoder.
+// The slice content can be arbitrary data.
+func WithDecoderDictRaw(id uint32, content []byte) DOption {
+	return func(o *decoderOptions) error {
+		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
+			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
+		}
+		o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}})
+		return nil
+	}
+}
+
 // WithDecoderMaxWindow allows to set a maximum window size for decodes.
 // WithDecoderMaxWindow allows to set a maximum window size for decodes.
 // This allows rejecting packets that will cause big memory usage.
 // This allows rejecting packets that will cause big memory usage.
 // The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.
 // The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.

+ 420 - 8
vendor/github.com/klauspost/compress/zstd/dict.go

@@ -6,6 +6,8 @@ import (
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
+	"math"
+	"sort"
 
 
 	"github.com/klauspost/compress/huff0"
 	"github.com/klauspost/compress/huff0"
 )
 )
@@ -15,12 +17,14 @@ type dict struct {
 
 
 	litEnc              *huff0.Scratch
 	litEnc              *huff0.Scratch
 	llDec, ofDec, mlDec sequenceDec
 	llDec, ofDec, mlDec sequenceDec
-	//llEnc, ofEnc, mlEnc []*fseEncoder
-	offsets [3]int
-	content []byte
+	offsets             [3]int
+	content             []byte
 }
 }
 
 
-var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
+const dictMagic = "\x37\xa4\x30\xec"
+
+// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
+const dictMaxLength = 1 << 31
 
 
 // ID returns the dictionary id or 0 if d is nil.
 // ID returns the dictionary id or 0 if d is nil.
 func (d *dict) ID() uint32 {
 func (d *dict) ID() uint32 {
@@ -30,14 +34,38 @@ func (d *dict) ID() uint32 {
 	return d.id
 	return d.id
 }
 }
 
 
-// DictContentSize returns the dictionary content size or 0 if d is nil.
-func (d *dict) DictContentSize() int {
+// ContentSize returns the dictionary content size or 0 if d is nil.
+func (d *dict) ContentSize() int {
 	if d == nil {
 	if d == nil {
 		return 0
 		return 0
 	}
 	}
 	return len(d.content)
 	return len(d.content)
 }
 }
 
 
+// Content returns the dictionary content.
+func (d *dict) Content() []byte {
+	if d == nil {
+		return nil
+	}
+	return d.content
+}
+
+// Offsets returns the initial offsets.
+func (d *dict) Offsets() [3]int {
+	if d == nil {
+		return [3]int{}
+	}
+	return d.offsets
+}
+
+// LitEncoder returns the literal encoder.
+func (d *dict) LitEncoder() *huff0.Scratch {
+	if d == nil {
+		return nil
+	}
+	return d.litEnc
+}
+
 // Load a dictionary as described in
 // Load a dictionary as described in
 // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
 // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
 func loadDict(b []byte) (*dict, error) {
 func loadDict(b []byte) (*dict, error) {
@@ -50,7 +78,7 @@ func loadDict(b []byte) (*dict, error) {
 		ofDec: sequenceDec{fse: &fseDecoder{}},
 		ofDec: sequenceDec{fse: &fseDecoder{}},
 		mlDec: sequenceDec{fse: &fseDecoder{}},
 		mlDec: sequenceDec{fse: &fseDecoder{}},
 	}
 	}
-	if !bytes.Equal(b[:4], dictMagic[:]) {
+	if string(b[:4]) != dictMagic {
 		return nil, ErrMagicMismatch
 		return nil, ErrMagicMismatch
 	}
 	}
 	d.id = binary.LittleEndian.Uint32(b[4:8])
 	d.id = binary.LittleEndian.Uint32(b[4:8])
@@ -62,7 +90,7 @@ func loadDict(b []byte) (*dict, error) {
 	var err error
 	var err error
 	d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
 	d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
 	if err != nil {
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("loading literal table: %w", err)
 	}
 	}
 	d.litEnc.Reuse = huff0.ReusePolicyMust
 	d.litEnc.Reuse = huff0.ReusePolicyMust
 
 
@@ -120,3 +148,387 @@ func loadDict(b []byte) (*dict, error) {
 
 
 	return &d, nil
 	return &d, nil
 }
 }
+
+// InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
+func InspectDictionary(b []byte) (interface {
+	ID() uint32
+	ContentSize() int
+	Content() []byte
+	Offsets() [3]int
+	LitEncoder() *huff0.Scratch
+}, error) {
+	initPredefined()
+	d, err := loadDict(b)
+	return d, err
+}
+
+type BuildDictOptions struct {
+	// Dictionary ID.
+	ID uint32
+
+	// Content to use to create dictionary tables.
+	Contents [][]byte
+
+	// History to use for all blocks.
+	History []byte
+
+	// Offsets to use.
+	Offsets [3]int
+
+	// CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier.
+	// See https://github.com/facebook/zstd/issues/3724
+	CompatV155 bool
+
+	// Use the specified encoder level.
+	// The dictionary will be built using the specified encoder level,
+	// which will reflect speed and make the dictionary tailored for that level.
+	// If not set SpeedBestCompression will be used.
+	Level EncoderLevel
+
+	// DebugOut will write stats and other details here if set.
+	DebugOut io.Writer
+}
+
+func BuildDict(o BuildDictOptions) ([]byte, error) {
+	initPredefined()
+	hist := o.History
+	contents := o.Contents
+	debug := o.DebugOut != nil
+	println := func(args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprintln(o.DebugOut, args...)
+		}
+	}
+	printf := func(s string, args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprintf(o.DebugOut, s, args...)
+		}
+	}
+	print := func(args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprint(o.DebugOut, args...)
+		}
+	}
+
+	if int64(len(hist)) > dictMaxLength {
+		return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength))
+	}
+	if len(hist) < 8 {
+		return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8)
+	}
+	if len(contents) == 0 {
+		return nil, errors.New("no content provided")
+	}
+	d := dict{
+		id:      o.ID,
+		litEnc:  nil,
+		llDec:   sequenceDec{},
+		ofDec:   sequenceDec{},
+		mlDec:   sequenceDec{},
+		offsets: o.Offsets,
+		content: hist,
+	}
+	block := blockEnc{lowMem: false}
+	block.init()
+	enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}})
+	if o.Level != 0 {
+		eOpts := encoderOptions{
+			level:      o.Level,
+			blockSize:  maxMatchLen,
+			windowSize: maxMatchLen,
+			dict:       &d,
+			lowMem:     false,
+		}
+		enc = eOpts.encoder()
+	} else {
+		o.Level = SpeedBestCompression
+	}
+	var (
+		remain [256]int
+		ll     [256]int
+		ml     [256]int
+		of     [256]int
+	)
+	addValues := func(dst *[256]int, src []byte) {
+		for _, v := range src {
+			dst[v]++
+		}
+	}
+	addHist := func(dst *[256]int, src *[256]uint32) {
+		for i, v := range src {
+			dst[i] += int(v)
+		}
+	}
+	seqs := 0
+	nUsed := 0
+	litTotal := 0
+	newOffsets := make(map[uint32]int, 1000)
+	for _, b := range contents {
+		block.reset(nil)
+		if len(b) < 8 {
+			continue
+		}
+		nUsed++
+		enc.Reset(&d, true)
+		enc.Encode(&block, b)
+		addValues(&remain, block.literals)
+		litTotal += len(block.literals)
+		seqs += len(block.sequences)
+		block.genCodes()
+		addHist(&ll, block.coders.llEnc.Histogram())
+		addHist(&ml, block.coders.mlEnc.Histogram())
+		addHist(&of, block.coders.ofEnc.Histogram())
+		for i, seq := range block.sequences {
+			if i > 3 {
+				break
+			}
+			offset := seq.offset
+			if offset == 0 {
+				continue
+			}
+			if offset > 3 {
+				newOffsets[offset-3]++
+			} else {
+				newOffsets[uint32(o.Offsets[offset-1])]++
+			}
+		}
+	}
+	// Find most used offsets.
+	var sortedOffsets []uint32
+	for k := range newOffsets {
+		sortedOffsets = append(sortedOffsets, k)
+	}
+	sort.Slice(sortedOffsets, func(i, j int) bool {
+		a, b := sortedOffsets[i], sortedOffsets[j]
+		if a == b {
+			// Prefer the longer offset
+			return sortedOffsets[i] > sortedOffsets[j]
+		}
+		return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]]
+	})
+	if len(sortedOffsets) > 3 {
+		if debug {
+			print("Offsets:")
+			for i, v := range sortedOffsets {
+				if i > 20 {
+					break
+				}
+				printf("[%d: %d],", v, newOffsets[v])
+			}
+			println("")
+		}
+
+		sortedOffsets = sortedOffsets[:3]
+	}
+	for i, v := range sortedOffsets {
+		o.Offsets[i] = int(v)
+	}
+	if debug {
+		println("New repeat offsets", o.Offsets)
+	}
+
+	if nUsed == 0 || seqs == 0 {
+		return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs)
+	}
+	if debug {
+		println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal)
+	}
+	if seqs/nUsed < 512 {
+		// Use 512 as minimum.
+		nUsed = seqs / 512
+	}
+	copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
+		hist := dst.Histogram()
+		var maxSym uint8
+		var maxCount int
+		var fakeLength int
+		for i, v := range src {
+			if v > 0 {
+				v = v / nUsed
+				if v == 0 {
+					v = 1
+				}
+			}
+			if v > maxCount {
+				maxCount = v
+			}
+			if v != 0 {
+				maxSym = uint8(i)
+			}
+			fakeLength += v
+			hist[i] = uint32(v)
+		}
+		dst.HistogramFinished(maxSym, maxCount)
+		dst.reUsed = false
+		dst.useRLE = false
+		err := dst.normalizeCount(fakeLength)
+		if err != nil {
+			return nil, err
+		}
+		if debug {
+			println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength)
+		}
+		return dst.writeCount(nil)
+	}
+	if debug {
+		print("Literal lengths: ")
+	}
+	llTable, err := copyHist(block.coders.llEnc, &ll)
+	if err != nil {
+		return nil, err
+	}
+	if debug {
+		print("Match lengths: ")
+	}
+	mlTable, err := copyHist(block.coders.mlEnc, &ml)
+	if err != nil {
+		return nil, err
+	}
+	if debug {
+		print("Offsets: ")
+	}
+	ofTable, err := copyHist(block.coders.ofEnc, &of)
+	if err != nil {
+		return nil, err
+	}
+
+	// Literal table
+	avgSize := litTotal
+	if avgSize > huff0.BlockSizeMax/2 {
+		avgSize = huff0.BlockSizeMax / 2
+	}
+	huffBuff := make([]byte, 0, avgSize)
+	// Target size
+	div := litTotal / avgSize
+	if div < 1 {
+		div = 1
+	}
+	if debug {
+		println("Huffman weights:")
+	}
+	for i, n := range remain[:] {
+		if n > 0 {
+			n = n / div
+			// Allow all entries to be represented.
+			if n == 0 {
+				n = 1
+			}
+			huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+			if debug {
+				printf("[%d: %d], ", i, n)
+			}
+		}
+	}
+	if o.CompatV155 && remain[255]/div == 0 {
+		huffBuff = append(huffBuff, 255)
+	}
+	scratch := &huff0.Scratch{TableLog: 11}
+	for tries := 0; tries < 255; tries++ {
+		scratch = &huff0.Scratch{TableLog: 11}
+		_, _, err = huff0.Compress1X(huffBuff, scratch)
+		if err == nil {
+			break
+		}
+		if debug {
+			printf("Try %d: Huffman error: %v\n", tries+1, err)
+		}
+		huffBuff = huffBuff[:0]
+		if tries == 250 {
+			if debug {
+				println("Huffman: Bailing out with predefined table")
+			}
+
+			// Bail out.... Just generate something
+			huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
+			for i := 0; i < 128; i++ {
+				huffBuff = append(huffBuff, byte(i))
+			}
+			continue
+		}
+		if errors.Is(err, huff0.ErrIncompressible) {
+			// Try truncating least common.
+			for i, n := range remain[:] {
+				if n > 0 {
+					n = n / (div * (i + 1))
+					if n > 0 {
+						huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+					}
+				}
+			}
+			if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 {
+				huffBuff = append(huffBuff, 255)
+			}
+			if len(huffBuff) == 0 {
+				huffBuff = append(huffBuff, 0, 255)
+			}
+		}
+		if errors.Is(err, huff0.ErrUseRLE) {
+			for i, n := range remain[:] {
+				n = n / (div * (i + 1))
+				// Allow all entries to be represented.
+				if n == 0 {
+					n = 1
+				}
+				huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+			}
+		}
+	}
+
+	var out bytes.Buffer
+	out.Write([]byte(dictMagic))
+	out.Write(binary.LittleEndian.AppendUint32(nil, o.ID))
+	out.Write(scratch.OutTable)
+	if debug {
+		println("huff table:", len(scratch.OutTable), "bytes")
+		println("of table:", len(ofTable), "bytes")
+		println("ml table:", len(mlTable), "bytes")
+		println("ll table:", len(llTable), "bytes")
+	}
+	out.Write(ofTable)
+	out.Write(mlTable)
+	out.Write(llTable)
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0])))
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1])))
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2])))
+	out.Write(hist)
+	if debug {
+		_, err := loadDict(out.Bytes())
+		if err != nil {
+			panic(err)
+		}
+		i, err := InspectDictionary(out.Bytes())
+		if err != nil {
+			panic(err)
+		}
+		println("ID:", i.ID())
+		println("Content size:", i.ContentSize())
+		println("Encoder:", i.LitEncoder() != nil)
+		println("Offsets:", i.Offsets())
+		var totalSize int
+		for _, b := range contents {
+			totalSize += len(b)
+		}
+
+		encWith := func(opts ...EOption) int {
+			enc, err := NewWriter(nil, opts...)
+			if err != nil {
+				panic(err)
+			}
+			defer enc.Close()
+			var dst []byte
+			var totalSize int
+			for _, b := range contents {
+				dst = enc.EncodeAll(b, dst[:0])
+				totalSize += len(dst)
+			}
+			return totalSize
+		}
+		plain := encWith(WithEncoderLevel(o.Level))
+		withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes()))
+		println("Input size:", totalSize)
+		println("Plain Compressed:", plain)
+		println("Dict Compressed:", withDict)
+		println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)")
+	}
+	return out.Bytes(), nil
+}

+ 7 - 22
vendor/github.com/klauspost/compress/zstd/enc_base.go

@@ -16,6 +16,7 @@ type fastBase struct {
 	cur int32
 	cur int32
 	// maximum offset. Should be at least 2x block size.
 	// maximum offset. Should be at least 2x block size.
 	maxMatchOff int32
 	maxMatchOff int32
+	bufferReset int32
 	hist        []byte
 	hist        []byte
 	crc         *xxhash.Digest
 	crc         *xxhash.Digest
 	tmp         [8]byte
 	tmp         [8]byte
@@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc {
 }
 }
 
 
 func (e *fastBase) addBlock(src []byte) int32 {
 func (e *fastBase) addBlock(src []byte) int32 {
-	if debugAsserts && e.cur > bufferReset {
-		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+	if debugAsserts && e.cur > e.bufferReset {
+		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
 	}
 	}
 	// check if we have space already
 	// check if we have space already
 	if len(e.hist)+len(src) > cap(e.hist) {
 	if len(e.hist)+len(src) > cap(e.hist) {
@@ -126,24 +127,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
 			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
 			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
 		}
 		}
 	}
 	}
-	a := src[s:]
-	b := src[t:]
-	b = b[:len(a)]
-	end := int32((len(a) >> 3) << 3)
-	for i := int32(0); i < end; i += 8 {
-		if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
-			return i + int32(bits.TrailingZeros64(diff)>>3)
-		}
-	}
-
-	a = a[end:]
-	b = b[end:]
-	for i := range a {
-		if a[i] != b[i] {
-			return int32(i) + end
-		}
-	}
-	return int32(len(a)) + end
+	return int32(matchLen(src[s:], src[t:]))
 }
 }
 
 
 // Reset the encoding table.
 // Reset the encoding table.
@@ -160,18 +144,19 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
 	} else {
 	} else {
 		e.crc.Reset()
 		e.crc.Reset()
 	}
 	}
+	e.blk.dictLitEnc = nil
 	if d != nil {
 	if d != nil {
 		low := e.lowMem
 		low := e.lowMem
 		if singleBlock {
 		if singleBlock {
 			e.lowMem = true
 			e.lowMem = true
 		}
 		}
-		e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
+		e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
 		e.lowMem = low
 		e.lowMem = low
 	}
 	}
 
 
 	// We offset current position so everything will be out of reach.
 	// We offset current position so everything will be out of reach.
 	// If above reset line, history will be purged.
 	// If above reset line, history will be purged.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += e.maxMatchOff + int32(len(e.hist))
 		e.cur += e.maxMatchOff + int32(len(e.hist))
 	}
 	}
 	e.hist = e.hist[:0]
 	e.hist = e.hist[:0]

+ 120 - 148
vendor/github.com/klauspost/compress/zstd/enc_best.go

@@ -32,10 +32,9 @@ type match struct {
 	length int32
 	length int32
 	rep    int32
 	rep    int32
 	est    int32
 	est    int32
-	_      [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
 }
 }
 
 
-const highScore = 25000
+const highScore = maxMatchLen * 8
 
 
 // estBits will estimate output bits from predefined tables.
 // estBits will estimate output bits from predefined tables.
 func (m *match) estBits(bitsPerByte int32) {
 func (m *match) estBits(bitsPerByte int32) {
@@ -85,14 +84,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 	)
 
 
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
-				e.table[i] = prevEntry{}
-			}
-			for i := range e.longTable[:] {
-				e.longTable[i] = prevEntry{}
-			}
+			e.table = [bestShortTableSize]prevEntry{}
+			e.longTable = [bestLongTableSize]prevEntry{}
 			e.cur = e.maxMatchOff
 			e.cur = e.maxMatchOff
 			break
 			break
 		}
 		}
@@ -164,7 +159,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 
 
 	// nextEmit is where in src the next emitLiteral should start from.
 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := s
 	nextEmit := s
-	cv := load6432(src, s)
 
 
 	// Relative offsets
 	// Relative offsets
 	offset1 := int32(blk.recentOffsets[0])
 	offset1 := int32(blk.recentOffsets[0])
@@ -178,7 +172,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 		s.litLen = uint32(until - nextEmit)
 	}
 	}
-	_ = addLiterals
 
 
 	if debugEncoder {
 	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 		println("recent offsets:", blk.recentOffsets)
@@ -193,49 +186,97 @@ encodeLoop:
 			panic("offset0 was 0")
 			panic("offset0 was 0")
 		}
 		}
 
 
-		bestOf := func(a, b match) match {
-			if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
-				return a
-			}
-			return b
-		}
-		const goodEnough = 100
+		const goodEnough = 250
+
+		cv := load6432(src, s)
 
 
 		nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
 		nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
 		nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
 		nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
 		candidateL := e.longTable[nextHashL]
 		candidateL := e.longTable[nextHashL]
 		candidateS := e.table[nextHashS]
 		candidateS := e.table[nextHashS]
 
 
-		matchAt := func(offset int32, s int32, first uint32, rep int32) match {
-			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
-				return match{s: s, est: highScore}
+		// Set m to a match at offset if it looks like that will improve compression.
+		improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
+			delta := s - offset
+			if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
+				return
 			}
 			}
 			if debugAsserts {
 			if debugAsserts {
+				if offset >= s {
+					panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
+				}
 				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
 				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
 					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
 					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
 				}
 				}
 			}
 			}
-			m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
-			m.estBits(bitsPerByte)
-			return m
+			// Try to quick reject if we already have a long match.
+			if m.length > 16 {
+				left := len(src) - int(m.s+m.length)
+				// If we are too close to the end, keep as is.
+				if left <= 0 {
+					return
+				}
+				checkLen := m.length - (s - m.s) - 8
+				if left > 2 && checkLen > 4 {
+					// Check 4 bytes, 4 bytes from the end of the current match.
+					a := load3232(src, offset+checkLen)
+					b := load3232(src, s+checkLen)
+					if a != b {
+						return
+					}
+				}
+			}
+			l := 4 + e.matchlen(s+4, offset+4, src)
+			if rep < 0 {
+				// Extend candidate match backwards as far as possible.
+				tMin := s - e.maxMatchOff
+				if tMin < 0 {
+					tMin = 0
+				}
+				for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
+					s--
+					offset--
+					l++
+				}
+			}
+
+			cand := match{offset: offset, s: s, length: l, rep: rep}
+			cand.estBits(bitsPerByte)
+			if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
+				*m = cand
+			}
 		}
 		}
 
 
-		best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
-		best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
-		best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+		best := match{s: s, est: highScore}
+		improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
 
 
 		if canRepeat && best.length < goodEnough {
 		if canRepeat && best.length < goodEnough {
-			cv32 := uint32(cv >> 8)
-			spp := s + 1
-			best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
-			best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
-			best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
-			if best.length > 0 {
-				cv32 = uint32(cv >> 24)
-				spp += 2
-				best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
-				best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
-				best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+			if s == nextEmit {
+				// Check repeats straight after a match.
+				improve(&best, s-offset2, s, uint32(cv), 1|4)
+				improve(&best, s-offset3, s, uint32(cv), 2|4)
+				if offset1 > 1 {
+					improve(&best, s-(offset1-1), s, uint32(cv), 3|4)
+				}
+			}
+
+			// If either no match or a non-repeat match, check at + 1
+			if best.rep <= 0 {
+				cv32 := uint32(cv >> 8)
+				spp := s + 1
+				improve(&best, spp-offset1, spp, cv32, 1)
+				improve(&best, spp-offset2, spp, cv32, 2)
+				improve(&best, spp-offset3, spp, cv32, 3)
+				if best.rep < 0 {
+					cv32 = uint32(cv >> 24)
+					spp += 2
+					improve(&best, spp-offset1, spp, cv32, 1)
+					improve(&best, spp-offset2, spp, cv32, 2)
+					improve(&best, spp-offset3, spp, cv32, 3)
+				}
 			}
 			}
 		}
 		}
 		// Load next and check...
 		// Load next and check...
@@ -250,40 +291,45 @@ encodeLoop:
 				if s >= sLimit {
 				if s >= sLimit {
 					break encodeLoop
 					break encodeLoop
 				}
 				}
-				cv = load6432(src, s)
 				continue
 				continue
 			}
 			}
 
 
-			s++
 			candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
 			candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
-			cv = load6432(src, s)
-			cv2 := load6432(src, s+1)
+			cv = load6432(src, s+1)
+			cv2 := load6432(src, s+2)
 			candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
 			candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
 			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
 			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
 
 
 			// Short at s+1
 			// Short at s+1
-			best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+			improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
 			// Long at s+1, s+2
 			// Long at s+1, s+2
-			best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
-			best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
-			best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
-			best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+			improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
+			improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
+			improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
+			improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
 			if false {
 			if false {
 				// Short at s+3.
 				// Short at s+3.
 				// Too often worse...
 				// Too often worse...
-				best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
+				improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
 			}
 			}
-			// See if we can find a better match by checking where the current best ends.
-			// Use that offset to see if we can find a better full match.
-			if sAt := best.s + best.length; sAt < sLimit {
-				nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
-				candidateEnd := e.longTable[nextHashL]
-				if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
-					bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
-					if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
-						bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+
+			// Start check at a fixed offset to allow for a few mismatches.
+			// For this compression level 2 yields the best results.
+			// We cannot do this if we have already indexed this position.
+			const skipBeginning = 2
+			if best.s > s-skipBeginning {
+				// See if we can find a better match by checking where the current best ends.
+				// Use that offset to see if we can find a better full match.
+				if sAt := best.s + best.length; sAt < sLimit {
+					nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
+					candidateEnd := e.longTable[nextHashL]
+
+					if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
+						improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+						if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
+							improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+						}
 					}
 					}
-					best = bestEnd
 				}
 				}
 			}
 			}
 		}
 		}
@@ -296,51 +342,34 @@ encodeLoop:
 
 
 		// We have a match, we can store the forward value
 		// We have a match, we can store the forward value
 		if best.rep > 0 {
 		if best.rep > 0 {
-			s = best.s
 			var seq seq
 			var seq seq
 			seq.matchLen = uint32(best.length - zstdMinMatch)
 			seq.matchLen = uint32(best.length - zstdMinMatch)
-
-			// We might be able to match backwards.
-			// Extend as long as we can.
-			start := best.s
-			// We end the search early, so we don't risk 0 literals
-			// and have to do special offset treatment.
-			startLimit := nextEmit + 1
-
-			tMin := s - e.maxMatchOff
-			if tMin < 0 {
-				tMin = 0
-			}
-			repIndex := best.offset
-			for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
-				repIndex--
-				start--
-				seq.matchLen++
+			if debugAsserts && s < nextEmit {
+				panic("s < nextEmit")
 			}
 			}
-			addLiterals(&seq, start)
+			addLiterals(&seq, best.s)
 
 
-			// rep 0
-			seq.offset = uint32(best.rep)
+			// Repeat. If bit 4 is set, this is a non-lit repeat.
+			seq.offset = uint32(best.rep & 3)
 			if debugSequences {
 			if debugSequences {
 				println("repeat sequence", seq, "next s:", s)
 				println("repeat sequence", seq, "next s:", s)
 			}
 			}
 			blk.sequences = append(blk.sequences, seq)
 			blk.sequences = append(blk.sequences, seq)
 
 
-			// Index match start+1 (long) -> s - 1
-			index0 := s
+			// Index old s + 1 -> s - 1
+			index0 := s + 1
 			s = best.s + best.length
 			s = best.s + best.length
 
 
 			nextEmit = s
 			nextEmit = s
 			if s >= sLimit {
 			if s >= sLimit {
 				if debugEncoder {
 				if debugEncoder {
 					println("repeat ended", s, best.length)
 					println("repeat ended", s, best.length)
-
 				}
 				}
 				break encodeLoop
 				break encodeLoop
 			}
 			}
 			// Index skipped...
 			// Index skipped...
 			off := index0 + e.cur
 			off := index0 + e.cur
-			for index0 < s-1 {
+			for index0 < s {
 				cv0 := load6432(src, index0)
 				cv0 := load6432(src, index0)
 				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
 				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -350,17 +379,19 @@ encodeLoop:
 				index0++
 				index0++
 			}
 			}
 			switch best.rep {
 			switch best.rep {
-			case 2:
+			case 2, 4 | 1:
 				offset1, offset2 = offset2, offset1
 				offset1, offset2 = offset2, offset1
-			case 3:
+			case 3, 4 | 2:
 				offset1, offset2, offset3 = offset3, offset1, offset2
 				offset1, offset2, offset3 = offset3, offset1, offset2
+			case 4 | 3:
+				offset1, offset2, offset3 = offset1-1, offset1, offset2
 			}
 			}
-			cv = load6432(src, s)
 			continue
 			continue
 		}
 		}
 
 
 		// A 4-byte match has been found. Update recent offsets.
 		// A 4-byte match has been found. Update recent offsets.
 		// We'll later see if more than 4 bytes.
 		// We'll later see if more than 4 bytes.
+		index0 := s + 1
 		s = best.s
 		s = best.s
 		t := best.offset
 		t := best.offset
 		offset1, offset2, offset3 = s-t, offset1, offset2
 		offset1, offset2, offset3 = s-t, offset1, offset2
@@ -373,22 +404,9 @@ encodeLoop:
 			panic("invalid offset")
 			panic("invalid offset")
 		}
 		}
 
 
-		// Extend the n-byte match as long as possible.
-		l := best.length
-
-		// Extend backwards
-		tMin := s - e.maxMatchOff
-		if tMin < 0 {
-			tMin = 0
-		}
-		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
-			s--
-			t--
-			l++
-		}
-
 		// Write our sequence
 		// Write our sequence
 		var seq seq
 		var seq seq
+		l := best.length
 		seq.litLen = uint32(s - nextEmit)
 		seq.litLen = uint32(s - nextEmit)
 		seq.matchLen = uint32(l - zstdMinMatch)
 		seq.matchLen = uint32(l - zstdMinMatch)
 		if seq.litLen > 0 {
 		if seq.litLen > 0 {
@@ -405,10 +423,8 @@ encodeLoop:
 			break encodeLoop
 			break encodeLoop
 		}
 		}
 
 
-		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
-		// every entry
-		for index0 < s-1 {
+		// Index old s + 1 -> s - 1
+		for index0 < s {
 			cv0 := load6432(src, index0)
 			cv0 := load6432(src, index0)
 			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
 			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -417,50 +433,6 @@ encodeLoop:
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 			index0++
 			index0++
 		}
 		}
-
-		cv = load6432(src, s)
-		if !canRepeat {
-			continue
-		}
-
-		// Check offset 2
-		for {
-			o2 := s - offset2
-			if load3232(src, o2) != uint32(cv) {
-				// Do regular search
-				break
-			}
-
-			// Store this, since we have it.
-			nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
-			nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
-
-			// We have at least 4 byte match.
-			// No need to check backwards. We come straight from a match
-			l := 4 + e.matchlen(s+4, o2+4, src)
-
-			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
-			e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
-			seq.matchLen = uint32(l) - zstdMinMatch
-			seq.litLen = 0
-
-			// Since litlen is always 0, this is offset 1.
-			seq.offset = 1
-			s += l
-			nextEmit = s
-			if debugSequences {
-				println("sequence", seq, "next s:", s)
-			}
-			blk.sequences = append(blk.sequences, seq)
-
-			// Swap offset 1 and 2.
-			offset1, offset2 = offset2, offset1
-			if s >= sLimit {
-				// Finished
-				break encodeLoop
-			}
-			cv = load6432(src, s)
-		}
 	}
 	}
 
 
 	if int(nextEmit) < len(src) {
 	if int(nextEmit) < len(src) {

+ 4 - 8
vendor/github.com/klauspost/compress/zstd/enc_better.go

@@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 	)
 
 
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
-				e.table[i] = tableEntry{}
-			}
-			for i := range e.longTable[:] {
-				e.longTable[i] = prevEntry{}
-			}
+			e.table = [betterShortTableSize]tableEntry{}
+			e.longTable = [betterLongTableSize]prevEntry{}
 			e.cur = e.maxMatchOff
 			e.cur = e.maxMatchOff
 			break
 			break
 		}
 		}
@@ -587,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 	)
 	)
 
 
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
 				e.table[i] = tableEntry{}

+ 7 - 11
vendor/github.com/klauspost/compress/zstd/enc_dfast.go

@@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 	)
 
 
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
-				e.table[i] = tableEntry{}
-			}
-			for i := range e.longTable[:] {
-				e.longTable[i] = tableEntry{}
-			}
+			e.table = [dFastShortTableSize]tableEntry{}
+			e.longTable = [dFastLongTableSize]tableEntry{}
 			e.cur = e.maxMatchOff
 			e.cur = e.maxMatchOff
 			break
 			break
 		}
 		}
@@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	)
 	)
 
 
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	if e.cur >= bufferReset {
+	if e.cur >= e.bufferReset {
 		for i := range e.table[:] {
 		for i := range e.table[:] {
 			e.table[i] = tableEntry{}
 			e.table[i] = tableEntry{}
 		}
 		}
@@ -685,7 +681,7 @@ encodeLoop:
 	}
 	}
 
 
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += int32(len(src))
 		e.cur += int32(len(src))
 	}
 	}
 }
 }
@@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 	)
 	)
 
 
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
 				e.table[i] = tableEntry{}
@@ -1088,7 +1084,7 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
 			}
 			}
 		}
 		}
 		e.lastDictID = d.id
 		e.lastDictID = d.id
-		e.allDirty = true
+		allDirty = true
 	}
 	}
 	// Reset table to initial state
 	// Reset table to initial state
 	e.cur = e.maxMatchOff
 	e.cur = e.maxMatchOff

+ 10 - 19
vendor/github.com/klauspost/compress/zstd/enc_fast.go

@@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 	)
 
 
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
 				e.table[i] = tableEntry{}
@@ -133,8 +133,7 @@ encodeLoop:
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				// Consider history as well.
 				var seq seq
 				var seq seq
-				var length int32
-				length = 4 + e.matchlen(s+6, repIndex+4, src)
+				length := 4 + e.matchlen(s+6, repIndex+4, src)
 				seq.matchLen = uint32(length - zstdMinMatch)
 				seq.matchLen = uint32(length - zstdMinMatch)
 
 
 				// We might be able to match backwards.
 				// We might be able to match backwards.
@@ -310,7 +309,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	}
 	}
 
 
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	if e.cur >= bufferReset {
+	if e.cur >= e.bufferReset {
 		for i := range e.table[:] {
 		for i := range e.table[:] {
 			e.table[i] = tableEntry{}
 			e.table[i] = tableEntry{}
 		}
 		}
@@ -538,7 +537,7 @@ encodeLoop:
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 	}
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += int32(len(src))
 		e.cur += int32(len(src))
 	}
 	}
 }
 }
@@ -555,11 +554,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
 		return
 		return
 	}
 	}
 	// Protect against e.cur wraparound.
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
-				e.table[i] = tableEntry{}
-			}
+			e.table = [tableSize]tableEntry{}
 			e.cur = e.maxMatchOff
 			e.cur = e.maxMatchOff
 			break
 			break
 		}
 		}
@@ -647,8 +644,7 @@ encodeLoop:
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				// Consider history as well.
 				var seq seq
 				var seq seq
-				var length int32
-				length = 4 + e.matchlen(s+6, repIndex+4, src)
+				length := 4 + e.matchlen(s+6, repIndex+4, src)
 
 
 				seq.matchLen = uint32(length - zstdMinMatch)
 				seq.matchLen = uint32(length - zstdMinMatch)
 
 
@@ -833,13 +829,12 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 		}
 		}
 		if true {
 		if true {
 			end := e.maxMatchOff + int32(len(d.content)) - 8
 			end := e.maxMatchOff + int32(len(d.content)) - 8
-			for i := e.maxMatchOff; i < end; i += 3 {
+			for i := e.maxMatchOff; i < end; i += 2 {
 				const hashLog = tableBits
 				const hashLog = tableBits
 
 
 				cv := load6432(d.content, i-e.maxMatchOff)
 				cv := load6432(d.content, i-e.maxMatchOff)
-				nextHash := hashLen(cv, hashLog, tableFastHashLen)      // 0 -> 5
-				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen)  // 1 -> 6
-				nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
+				nextHash := hashLen(cv, hashLog, tableFastHashLen)     // 0 -> 6
+				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
 				e.dictTable[nextHash] = tableEntry{
 				e.dictTable[nextHash] = tableEntry{
 					val:    uint32(cv),
 					val:    uint32(cv),
 					offset: i,
 					offset: i,
@@ -848,10 +843,6 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 					val:    uint32(cv >> 8),
 					val:    uint32(cv >> 8),
 					offset: i + 1,
 					offset: i + 1,
 				}
 				}
-				e.dictTable[nextHash2] = tableEntry{
-					val:    uint32(cv >> 16),
-					offset: i + 2,
-				}
 			}
 			}
 		}
 		}
 		e.lastDictID = d.id
 		e.lastDictID = d.id

+ 52 - 74
vendor/github.com/klauspost/compress/zstd/encoder.go

@@ -8,6 +8,7 @@ import (
 	"crypto/rand"
 	"crypto/rand"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
+	"math"
 	rdebug "runtime/debug"
 	rdebug "runtime/debug"
 	"sync"
 	"sync"
 
 
@@ -226,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error {
 			DictID:        e.o.dict.ID(),
 			DictID:        e.o.dict.ID(),
 		}
 		}
 
 
-		dst, err := fh.appendTo(tmp[:0])
-		if err != nil {
-			return err
-		}
+		dst := fh.appendTo(tmp[:0])
 		s.headerWritten = true
 		s.headerWritten = true
 		s.wWg.Wait()
 		s.wWg.Wait()
 		var n2 int
 		var n2 int
@@ -276,23 +274,9 @@ func (e *Encoder) nextBlock(final bool) error {
 			s.eofWritten = true
 			s.eofWritten = true
 		}
 		}
 
 
-		err := errIncompressible
-		// If we got the exact same number of literals as input,
-		// assume the literals cannot be compressed.
-		if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
-			err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-		}
-		switch err {
-		case errIncompressible:
-			if debugEncoder {
-				println("Storing incompressible block as raw")
-			}
-			blk.encodeRaw(src)
-			// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
-		case nil:
-		default:
-			s.err = err
-			return err
+		s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+		if s.err != nil {
+			return s.err
 		}
 		}
 		_, s.err = s.w.Write(blk.output)
 		_, s.err = s.w.Write(blk.output)
 		s.nWritten += int64(len(blk.output))
 		s.nWritten += int64(len(blk.output))
@@ -342,22 +326,8 @@ func (e *Encoder) nextBlock(final bool) error {
 				}
 				}
 				s.wWg.Done()
 				s.wWg.Done()
 			}()
 			}()
-			err := errIncompressible
-			// If we got the exact same number of literals as input,
-			// assume the literals cannot be compressed.
-			if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
-				err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-			}
-			switch err {
-			case errIncompressible:
-				if debugEncoder {
-					println("Storing incompressible block as raw")
-				}
-				blk.encodeRaw(src)
-				// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
-			case nil:
-			default:
-				s.writeErr = err
+			s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+			if s.writeErr != nil {
 				return
 				return
 			}
 			}
 			_, s.writeErr = s.w.Write(blk.output)
 			_, s.writeErr = s.w.Write(blk.output)
@@ -510,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 				Checksum: false,
 				Checksum: false,
 				DictID:   0,
 				DictID:   0,
 			}
 			}
-			dst, _ = fh.appendTo(dst)
+			dst = fh.appendTo(dst)
 
 
 			// Write raw block as last one only.
 			// Write raw block as last one only.
 			var blk blockHeader
 			var blk blockHeader
@@ -545,10 +515,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
 	if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
 		dst = make([]byte, 0, len(src))
 		dst = make([]byte, 0, len(src))
 	}
 	}
-	dst, err := fh.appendTo(dst)
-	if err != nil {
-		panic(err)
-	}
+	dst = fh.appendTo(dst)
 
 
 	// If we can do everything in one block, prefer that.
 	// If we can do everything in one block, prefer that.
 	if len(src) <= e.o.blockSize {
 	if len(src) <= e.o.blockSize {
@@ -567,25 +534,15 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 
 
 		// If we got the exact same number of literals as input,
 		// If we got the exact same number of literals as input,
 		// assume the literals cannot be compressed.
 		// assume the literals cannot be compressed.
-		err := errIncompressible
 		oldout := blk.output
 		oldout := blk.output
-		if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
-			// Output directly to dst
-			blk.output = dst
-			err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-		}
+		// Output directly to dst
+		blk.output = dst
 
 
-		switch err {
-		case errIncompressible:
-			if debugEncoder {
-				println("Storing incompressible block as raw")
-			}
-			dst = blk.encodeRawTo(dst, src)
-		case nil:
-			dst = blk.output
-		default:
+		err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+		if err != nil {
 			panic(err)
 			panic(err)
 		}
 		}
+		dst = blk.output
 		blk.output = oldout
 		blk.output = oldout
 	} else {
 	} else {
 		enc.Reset(e.o.dict, false)
 		enc.Reset(e.o.dict, false)
@@ -604,25 +561,11 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 			if len(src) == 0 {
 			if len(src) == 0 {
 				blk.last = true
 				blk.last = true
 			}
 			}
-			err := errIncompressible
-			// If we got the exact same number of literals as input,
-			// assume the literals cannot be compressed.
-			if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
-				err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
-			}
-
-			switch err {
-			case errIncompressible:
-				if debugEncoder {
-					println("Storing incompressible block as raw")
-				}
-				dst = blk.encodeRawTo(dst, todo)
-				blk.popOffsets()
-			case nil:
-				dst = append(dst, blk.output...)
-			default:
+			err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
+			if err != nil {
 				panic(err)
 				panic(err)
 			}
 			}
+			dst = append(dst, blk.output...)
 			blk.reset(nil)
 			blk.reset(nil)
 		}
 		}
 	}
 	}
@@ -632,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	// Add padding with content from crypto/rand.Reader
 	// Add padding with content from crypto/rand.Reader
 	if e.o.pad > 0 {
 	if e.o.pad > 0 {
 		add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
 		add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
+		var err error
 		dst, err = skippableFrame(dst, add, rand.Reader)
 		dst, err = skippableFrame(dst, add, rand.Reader)
 		if err != nil {
 		if err != nil {
 			panic(err)
 			panic(err)
@@ -639,3 +583,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	}
 	}
 	return dst
 	return dst
 }
 }
+
+// MaxEncodedSize returns the expected maximum
+// size of an encoded block or stream.
+func (e *Encoder) MaxEncodedSize(size int) int {
+	frameHeader := 4 + 2 // magic + frame header & window descriptor
+	if e.o.dict != nil {
+		frameHeader += 4
+	}
+	// Frame content size:
+	if size < 256 {
+		frameHeader++
+	} else if size < 65536+256 {
+		frameHeader += 2
+	} else if size < math.MaxInt32 {
+		frameHeader += 4
+	} else {
+		frameHeader += 8
+	}
+	// Final crc
+	if e.o.crc {
+		frameHeader += 4
+	}
+
+	// Max overhead is 3 bytes/block.
+	// There cannot be 0 blocks.
+	blocks := (size + e.o.blockSize) / e.o.blockSize
+
+	// Combine, add padding.
+	maxSz := frameHeader + 3*blocks + size
+	if e.o.pad > 1 {
+		maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad))
+	}
+	return maxSz
+}

+ 32 - 10
vendor/github.com/klauspost/compress/zstd/encoder_options.go

@@ -3,6 +3,8 @@ package zstd
 import (
 import (
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
+	"math"
+	"math/bits"
 	"runtime"
 	"runtime"
 	"strings"
 	"strings"
 )
 )
@@ -37,7 +39,7 @@ func (o *encoderOptions) setDefault() {
 		blockSize:     maxCompressedBlockSize,
 		blockSize:     maxCompressedBlockSize,
 		windowSize:    8 << 20,
 		windowSize:    8 << 20,
 		level:         SpeedDefault,
 		level:         SpeedDefault,
-		allLitEntropy: true,
+		allLitEntropy: false,
 		lowMem:        false,
 		lowMem:        false,
 	}
 	}
 }
 }
@@ -47,22 +49,22 @@ func (o encoderOptions) encoder() encoder {
 	switch o.level {
 	switch o.level {
 	case SpeedFastest:
 	case SpeedFastest:
 		if o.dict != nil {
 		if o.dict != nil {
-			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 		}
 		}
-		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 
 
 	case SpeedDefault:
 	case SpeedDefault:
 		if o.dict != nil {
 		if o.dict != nil {
-			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
+			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
 		}
 		}
-		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 	case SpeedBetterCompression:
 	case SpeedBetterCompression:
 		if o.dict != nil {
 		if o.dict != nil {
-			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 		}
 		}
-		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 	case SpeedBestCompression:
 	case SpeedBestCompression:
-		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 	}
 	}
 	panic("unknown compression level")
 	panic("unknown compression level")
 }
 }
@@ -127,7 +129,7 @@ func WithEncoderPadding(n int) EOption {
 		}
 		}
 		// No need to waste our time.
 		// No need to waste our time.
 		if n == 1 {
 		if n == 1 {
-			o.pad = 0
+			n = 0
 		}
 		}
 		if n > 1<<30 {
 		if n > 1<<30 {
 			return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
 			return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
@@ -236,7 +238,7 @@ func WithEncoderLevel(l EncoderLevel) EOption {
 			}
 			}
 		}
 		}
 		if !o.customALEntropy {
 		if !o.customALEntropy {
-			o.allLitEntropy = l > SpeedFastest
+			o.allLitEntropy = l > SpeedDefault
 		}
 		}
 
 
 		return nil
 		return nil
@@ -304,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption {
 }
 }
 
 
 // WithEncoderDict allows to register a dictionary that will be used for the encode.
 // WithEncoderDict allows to register a dictionary that will be used for the encode.
+//
+// The slice dict must be in the [dictionary format] produced by
+// "zstd --train" from the Zstandard reference implementation.
+//
 // The encoder *may* choose to use no dictionary instead for certain payloads.
 // The encoder *may* choose to use no dictionary instead for certain payloads.
+//
+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
 func WithEncoderDict(dict []byte) EOption {
 func WithEncoderDict(dict []byte) EOption {
 	return func(o *encoderOptions) error {
 	return func(o *encoderOptions) error {
 		d, err := loadDict(dict)
 		d, err := loadDict(dict)
@@ -315,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption {
 		return nil
 		return nil
 	}
 	}
 }
 }
+
+// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
+//
+// The slice content may contain arbitrary data. It will be used as an initial
+// history.
+func WithEncoderDictRaw(id uint32, content []byte) EOption {
+	return func(o *encoderOptions) error {
+		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
+			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
+		}
+		o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
+		return nil
+	}
+}

+ 31 - 59
vendor/github.com/klauspost/compress/zstd/framedec.go

@@ -5,7 +5,7 @@
 package zstd
 package zstd
 
 
 import (
 import (
-	"bytes"
+	"encoding/binary"
 	"encoding/hex"
 	"encoding/hex"
 	"errors"
 	"errors"
 	"io"
 	"io"
@@ -29,7 +29,7 @@ type frameDec struct {
 
 
 	FrameContentSize uint64
 	FrameContentSize uint64
 
 
-	DictionaryID  *uint32
+	DictionaryID  uint32
 	HasCheckSum   bool
 	HasCheckSum   bool
 	SingleSegment bool
 	SingleSegment bool
 }
 }
@@ -43,9 +43,9 @@ const (
 	MaxWindowSize = 1 << 29
 	MaxWindowSize = 1 << 29
 )
 )
 
 
-var (
-	frameMagic          = []byte{0x28, 0xb5, 0x2f, 0xfd}
-	skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
+const (
+	frameMagic          = "\x28\xb5\x2f\xfd"
+	skippableFrameMagic = "\x2a\x4d\x18"
 )
 )
 
 
 func newFrameDec(o decoderOptions) *frameDec {
 func newFrameDec(o decoderOptions) *frameDec {
@@ -73,25 +73,25 @@ func (d *frameDec) reset(br byteBuffer) error {
 		switch err {
 		switch err {
 		case io.EOF, io.ErrUnexpectedEOF:
 		case io.EOF, io.ErrUnexpectedEOF:
 			return io.EOF
 			return io.EOF
-		default:
-			return err
 		case nil:
 		case nil:
 			signature[0] = b[0]
 			signature[0] = b[0]
+		default:
+			return err
 		}
 		}
 		// Read the rest, don't allow io.ErrUnexpectedEOF
 		// Read the rest, don't allow io.ErrUnexpectedEOF
 		b, err = br.readSmall(3)
 		b, err = br.readSmall(3)
 		switch err {
 		switch err {
 		case io.EOF:
 		case io.EOF:
 			return io.EOF
 			return io.EOF
-		default:
-			return err
 		case nil:
 		case nil:
 			copy(signature[1:], b)
 			copy(signature[1:], b)
+		default:
+			return err
 		}
 		}
 
 
-		if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
+		if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
 			if debugDecoder {
 			if debugDecoder {
-				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
+				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
 			}
 			}
 			// Break if not skippable frame.
 			// Break if not skippable frame.
 			break
 			break
@@ -114,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error {
 			return err
 			return err
 		}
 		}
 	}
 	}
-	if !bytes.Equal(signature[:], frameMagic) {
+	if string(signature[:]) != frameMagic {
 		if debugDecoder {
 		if debugDecoder {
-			println("Got magic numbers: ", signature, "want:", frameMagic)
+			println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
 		}
 		}
 		return ErrMagicMismatch
 		return ErrMagicMismatch
 	}
 	}
@@ -155,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 
 
 	// Read Dictionary_ID
 	// Read Dictionary_ID
 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
-	d.DictionaryID = nil
+	d.DictionaryID = 0
 	if size := fhd & 3; size != 0 {
 	if size := fhd & 3; size != 0 {
 		if size == 3 {
 		if size == 3 {
 			size = 4
 			size = 4
@@ -167,7 +167,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 			return err
 			return err
 		}
 		}
 		var id uint32
 		var id uint32
-		switch size {
+		switch len(b) {
 		case 1:
 		case 1:
 			id = uint32(b[0])
 			id = uint32(b[0])
 		case 2:
 		case 2:
@@ -178,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 		if debugDecoder {
 		if debugDecoder {
 			println("Dict size", size, "ID:", id)
 			println("Dict size", size, "ID:", id)
 		}
 		}
-		if id > 0 {
-			// ID 0 means "sorry, no dictionary anyway".
-			// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
-			d.DictionaryID = &id
-		}
+		d.DictionaryID = id
 	}
 	}
 
 
 	// Read Frame_Content_Size
 	// Read Frame_Content_Size
@@ -204,7 +200,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 			println("Reading Frame content", err)
 			println("Reading Frame content", err)
 			return err
 			return err
 		}
 		}
-		switch fcsSize {
+		switch len(b) {
 		case 1:
 		case 1:
 			d.FrameContentSize = uint64(b[0])
 			d.FrameContentSize = uint64(b[0])
 		case 2:
 		case 2:
@@ -297,55 +293,38 @@ func (d *frameDec) next(block *blockDec) error {
 	return nil
 	return nil
 }
 }
 
 
-// checkCRC will check the checksum if the frame has one.
+// checkCRC will check the checksum, assuming the frame has one.
 // Will return ErrCRCMismatch if crc check failed, otherwise nil.
 // Will return ErrCRCMismatch if crc check failed, otherwise nil.
 func (d *frameDec) checkCRC() error {
 func (d *frameDec) checkCRC() error {
-	if !d.HasCheckSum {
-		return nil
-	}
-
 	// We can overwrite upper tmp now
 	// We can overwrite upper tmp now
-	want, err := d.rawInput.readSmall(4)
+	buf, err := d.rawInput.readSmall(4)
 	if err != nil {
 	if err != nil {
 		println("CRC missing?", err)
 		println("CRC missing?", err)
 		return err
 		return err
 	}
 	}
 
 
-	if d.o.ignoreChecksum {
-		return nil
-	}
-
-	var tmp [4]byte
-	got := d.crc.Sum64()
-	// Flip to match file order.
-	tmp[0] = byte(got >> 0)
-	tmp[1] = byte(got >> 8)
-	tmp[2] = byte(got >> 16)
-	tmp[3] = byte(got >> 24)
+	want := binary.LittleEndian.Uint32(buf[:4])
+	got := uint32(d.crc.Sum64())
 
 
-	if !bytes.Equal(tmp[:], want) {
+	if got != want {
 		if debugDecoder {
 		if debugDecoder {
-			println("CRC Check Failed:", tmp[:], "!=", want)
+			printf("CRC check failed: got %08x, want %08x\n", got, want)
 		}
 		}
 		return ErrCRCMismatch
 		return ErrCRCMismatch
 	}
 	}
 	if debugDecoder {
 	if debugDecoder {
-		println("CRC ok", tmp[:])
+		printf("CRC ok %08x\n", got)
 	}
 	}
 	return nil
 	return nil
 }
 }
 
 
-// consumeCRC reads the checksum data if the frame has one.
+// consumeCRC skips over the checksum, assuming the frame has one.
 func (d *frameDec) consumeCRC() error {
 func (d *frameDec) consumeCRC() error {
-	if d.HasCheckSum {
-		_, err := d.rawInput.readSmall(4)
-		if err != nil {
-			println("CRC missing?", err)
-			return err
-		}
+	_, err := d.rawInput.readSmall(4)
+	if err != nil {
+		println("CRC missing?", err)
 	}
 	}
-
-	return nil
+	return err
 }
 }
 
 
 // runDecoder will run the decoder for the remainder of the frame.
 // runDecoder will run the decoder for the remainder of the frame.
@@ -424,15 +403,8 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
 			if d.o.ignoreChecksum {
 			if d.o.ignoreChecksum {
 				err = d.consumeCRC()
 				err = d.consumeCRC()
 			} else {
 			} else {
-				var n int
-				n, err = d.crc.Write(dst[crcStart:])
-				if err == nil {
-					if n != len(dst)-crcStart {
-						err = io.ErrShortWrite
-					} else {
-						err = d.checkCRC()
-					}
-				}
+				d.crc.Write(dst[crcStart:])
+				err = d.checkCRC()
 			}
 			}
 		}
 		}
 	}
 	}

+ 2 - 2
vendor/github.com/klauspost/compress/zstd/frameenc.go

@@ -22,7 +22,7 @@ type frameHeader struct {
 
 
 const maxHeaderSize = 14
 const maxHeaderSize = 14
 
 
-func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
+func (f frameHeader) appendTo(dst []byte) []byte {
 	dst = append(dst, frameMagic...)
 	dst = append(dst, frameMagic...)
 	var fhd uint8
 	var fhd uint8
 	if f.Checksum {
 	if f.Checksum {
@@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 	default:
 	default:
 		panic("invalid fcs")
 		panic("invalid fcs")
 	}
 	}
-	return dst, nil
+	return dst
 }
 }
 
 
 const skippableFrameHeader = 4 + 4
 const skippableFrameHeader = 4 + 4

+ 31 - 18
vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md

@@ -2,12 +2,7 @@
 
 
 VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
 VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
 
 
-
-[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
-[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
-
-xxhash is a Go implementation of the 64-bit
-[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
+xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
 high-quality hashing algorithm that is much faster than anything in the Go
 high-quality hashing algorithm that is much faster than anything in the Go
 standard library.
 standard library.
 
 
@@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error)
 func (*Digest) Sum64() uint64
 func (*Digest) Sum64() uint64
 ```
 ```
 
 
-This implementation provides a fast pure-Go implementation and an even faster
-assembly implementation for amd64.
+The package is written with optimized pure Go and also contains even faster
+assembly implementations for amd64 and arm64. If desired, the `purego` build tag
+opts into using the Go code even on those architectures.
+
+[xxHash]: http://cyan4973.github.io/xxHash/
+
+## Compatibility
+
+This package is in a module and the latest code is in version 2 of the module.
+You need a version of Go with at least "minimal module compatibility" to use
+github.com/cespare/xxhash/v2:
+
+* 1.9.7+ for Go 1.9
+* 1.10.3+ for Go 1.10
+* Go 1.11 or later
+
+I recommend using the latest release of Go.
 
 
 ## Benchmarks
 ## Benchmarks
 
 
 Here are some quick benchmarks comparing the pure-Go and assembly
 Here are some quick benchmarks comparing the pure-Go and assembly
 implementations of Sum64.
 implementations of Sum64.
 
 
-| input size | purego | asm |
-| --- | --- | --- |
-| 5 B   |  979.66 MB/s |  1291.17 MB/s  |
-| 100 B | 7475.26 MB/s | 7973.40 MB/s  |
-| 4 KB  | 17573.46 MB/s | 17602.65 MB/s |
-| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
+| input size | purego    | asm       |
+| ---------- | --------- | --------- |
+| 4 B        |  1.3 GB/s |  1.2 GB/s |
+| 16 B       |  2.9 GB/s |  3.5 GB/s |
+| 100 B      |  6.9 GB/s |  8.1 GB/s |
+| 4 KB       | 11.7 GB/s | 16.7 GB/s |
+| 10 MB      | 12.0 GB/s | 17.3 GB/s |
 
 
-These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
-the following commands under Go 1.11.2:
+These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
+CPU using the following commands under Go 1.19.2:
 
 
 ```
 ```
-$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
-$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
+benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
 ```
 ```
 
 
 ## Projects using this package
 ## Projects using this package
 
 
 - [InfluxDB](https://github.com/influxdata/influxdb)
 - [InfluxDB](https://github.com/influxdata/influxdb)
 - [Prometheus](https://github.com/prometheus/prometheus)
 - [Prometheus](https://github.com/prometheus/prometheus)
+- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
 - [FreeCache](https://github.com/coocood/freecache)
 - [FreeCache](https://github.com/coocood/freecache)
+- [FastCache](https://github.com/VictoriaMetrics/fastcache)

+ 20 - 27
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go

@@ -18,19 +18,11 @@ const (
 	prime5 uint64 = 2870177450012600261
 	prime5 uint64 = 2870177450012600261
 )
 )
 
 
-// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
-// possible in the Go code is worth a small (but measurable) performance boost
-// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
-// convenience in the Go code in a few places where we need to intentionally
-// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
-// result overflows a uint64).
-var (
-	prime1v = prime1
-	prime2v = prime2
-	prime3v = prime3
-	prime4v = prime4
-	prime5v = prime5
-)
+// Store the primes in an array as well.
+//
+// The consts are used when possible in Go code to avoid MOVs but we need a
+// contiguous array of the assembly code.
+var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
 
 
 // Digest implements hash.Hash64.
 // Digest implements hash.Hash64.
 type Digest struct {
 type Digest struct {
@@ -52,10 +44,10 @@ func New() *Digest {
 
 
 // Reset clears the Digest's state so that it can be reused.
 // Reset clears the Digest's state so that it can be reused.
 func (d *Digest) Reset() {
 func (d *Digest) Reset() {
-	d.v1 = prime1v + prime2
+	d.v1 = primes[0] + prime2
 	d.v2 = prime2
 	d.v2 = prime2
 	d.v3 = 0
 	d.v3 = 0
-	d.v4 = -prime1v
+	d.v4 = -primes[0]
 	d.total = 0
 	d.total = 0
 	d.n = 0
 	d.n = 0
 }
 }
@@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
 	n = len(b)
 	n = len(b)
 	d.total += uint64(n)
 	d.total += uint64(n)
 
 
+	memleft := d.mem[d.n&(len(d.mem)-1):]
+
 	if d.n+n < 32 {
 	if d.n+n < 32 {
 		// This new data doesn't even fill the current block.
 		// This new data doesn't even fill the current block.
-		copy(d.mem[d.n:], b)
+		copy(memleft, b)
 		d.n += n
 		d.n += n
 		return
 		return
 	}
 	}
 
 
 	if d.n > 0 {
 	if d.n > 0 {
 		// Finish off the partial block.
 		// Finish off the partial block.
-		copy(d.mem[d.n:], b)
+		c := copy(memleft, b)
 		d.v1 = round(d.v1, u64(d.mem[0:8]))
 		d.v1 = round(d.v1, u64(d.mem[0:8]))
 		d.v2 = round(d.v2, u64(d.mem[8:16]))
 		d.v2 = round(d.v2, u64(d.mem[8:16]))
 		d.v3 = round(d.v3, u64(d.mem[16:24]))
 		d.v3 = round(d.v3, u64(d.mem[16:24]))
 		d.v4 = round(d.v4, u64(d.mem[24:32]))
 		d.v4 = round(d.v4, u64(d.mem[24:32]))
-		b = b[32-d.n:]
+		b = b[c:]
 		d.n = 0
 		d.n = 0
 	}
 	}
 
 
@@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 {
 
 
 	h += d.total
 	h += d.total
 
 
-	i, end := 0, d.n
-	for ; i+8 <= end; i += 8 {
-		k1 := round(0, u64(d.mem[i:i+8]))
+	b := d.mem[:d.n&(len(d.mem)-1)]
+	for ; len(b) >= 8; b = b[8:] {
+		k1 := round(0, u64(b[:8]))
 		h ^= k1
 		h ^= k1
 		h = rol27(h)*prime1 + prime4
 		h = rol27(h)*prime1 + prime4
 	}
 	}
-	if i+4 <= end {
-		h ^= uint64(u32(d.mem[i:i+4])) * prime1
+	if len(b) >= 4 {
+		h ^= uint64(u32(b[:4])) * prime1
 		h = rol23(h)*prime2 + prime3
 		h = rol23(h)*prime2 + prime3
-		i += 4
+		b = b[4:]
 	}
 	}
-	for i < end {
-		h ^= uint64(d.mem[i]) * prime5
+	for ; len(b) > 0; b = b[1:] {
+		h ^= uint64(b[0]) * prime5
 		h = rol11(h) * prime1
 		h = rol11(h) * prime1
-		i++
 	}
 	}
 
 
 	h ^= h >> 33
 	h ^= h >> 33

+ 165 - 171
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s

@@ -1,3 +1,4 @@
+//go:build !appengine && gc && !purego && !noasm
 // +build !appengine
 // +build !appengine
 // +build gc
 // +build gc
 // +build !purego
 // +build !purego
@@ -5,212 +6,205 @@
 
 
 #include "textflag.h"
 #include "textflag.h"
 
 
-// Register allocation:
-// AX	h
-// SI	pointer to advance through b
-// DX	n
-// BX	loop end
-// R8	v1, k1
-// R9	v2
-// R10	v3
-// R11	v4
-// R12	tmp
-// R13	prime1v
-// R14	prime2v
-// DI	prime4v
-
-// round reads from and advances the buffer pointer in SI.
-// It assumes that R13 has prime1v and R14 has prime2v.
-#define round(r) \
-	MOVQ  (SI), R12 \
-	ADDQ  $8, SI    \
-	IMULQ R14, R12  \
-	ADDQ  R12, r    \
-	ROLQ  $31, r    \
-	IMULQ R13, r
-
-// mergeRound applies a merge round on the two registers acc and val.
-// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
-#define mergeRound(acc, val) \
-	IMULQ R14, val \
-	ROLQ  $31, val \
-	IMULQ R13, val \
-	XORQ  val, acc \
-	IMULQ R13, acc \
-	ADDQ  DI, acc
+// Registers:
+#define h      AX
+#define d      AX
+#define p      SI // pointer to advance through b
+#define n      DX
+#define end    BX // loop end
+#define v1     R8
+#define v2     R9
+#define v3     R10
+#define v4     R11
+#define x      R12
+#define prime1 R13
+#define prime2 R14
+#define prime4 DI
+
+#define round(acc, x) \
+	IMULQ prime2, x   \
+	ADDQ  x, acc      \
+	ROLQ  $31, acc    \
+	IMULQ prime1, acc
+
+// round0 performs the operation x = round(0, x).
+#define round0(x) \
+	IMULQ prime2, x \
+	ROLQ  $31, x    \
+	IMULQ prime1, x
+
+// mergeRound applies a merge round on the two registers acc and x.
+// It assumes that prime1, prime2, and prime4 have been loaded.
+#define mergeRound(acc, x) \
+	round0(x)         \
+	XORQ  x, acc      \
+	IMULQ prime1, acc \
+	ADDQ  prime4, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that there is at least one block
+// to process.
+#define blockLoop() \
+loop:  \
+	MOVQ +0(p), x  \
+	round(v1, x)   \
+	MOVQ +8(p), x  \
+	round(v2, x)   \
+	MOVQ +16(p), x \
+	round(v3, x)   \
+	MOVQ +24(p), x \
+	round(v4, x)   \
+	ADDQ $32, p    \
+	CMPQ p, end    \
+	JLE  loop
 
 
 // func Sum64(b []byte) uint64
 // func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOSPLIT, $0-32
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
 	// Load fixed primes.
 	// Load fixed primes.
-	MOVQ ·prime1v(SB), R13
-	MOVQ ·prime2v(SB), R14
-	MOVQ ·prime4v(SB), DI
+	MOVQ ·primes+0(SB), prime1
+	MOVQ ·primes+8(SB), prime2
+	MOVQ ·primes+24(SB), prime4
 
 
 	// Load slice.
 	// Load slice.
-	MOVQ b_base+0(FP), SI
-	MOVQ b_len+8(FP), DX
-	LEAQ (SI)(DX*1), BX
+	MOVQ b_base+0(FP), p
+	MOVQ b_len+8(FP), n
+	LEAQ (p)(n*1), end
 
 
 	// The first loop limit will be len(b)-32.
 	// The first loop limit will be len(b)-32.
-	SUBQ $32, BX
+	SUBQ $32, end
 
 
 	// Check whether we have at least one block.
 	// Check whether we have at least one block.
-	CMPQ DX, $32
+	CMPQ n, $32
 	JLT  noBlocks
 	JLT  noBlocks
 
 
 	// Set up initial state (v1, v2, v3, v4).
 	// Set up initial state (v1, v2, v3, v4).
-	MOVQ R13, R8
-	ADDQ R14, R8
-	MOVQ R14, R9
-	XORQ R10, R10
-	XORQ R11, R11
-	SUBQ R13, R11
-
-	// Loop until SI > BX.
-blockLoop:
-	round(R8)
-	round(R9)
-	round(R10)
-	round(R11)
-
-	CMPQ SI, BX
-	JLE  blockLoop
-
-	MOVQ R8, AX
-	ROLQ $1, AX
-	MOVQ R9, R12
-	ROLQ $7, R12
-	ADDQ R12, AX
-	MOVQ R10, R12
-	ROLQ $12, R12
-	ADDQ R12, AX
-	MOVQ R11, R12
-	ROLQ $18, R12
-	ADDQ R12, AX
-
-	mergeRound(AX, R8)
-	mergeRound(AX, R9)
-	mergeRound(AX, R10)
-	mergeRound(AX, R11)
+	MOVQ prime1, v1
+	ADDQ prime2, v1
+	MOVQ prime2, v2
+	XORQ v3, v3
+	XORQ v4, v4
+	SUBQ prime1, v4
+
+	blockLoop()
+
+	MOVQ v1, h
+	ROLQ $1, h
+	MOVQ v2, x
+	ROLQ $7, x
+	ADDQ x, h
+	MOVQ v3, x
+	ROLQ $12, x
+	ADDQ x, h
+	MOVQ v4, x
+	ROLQ $18, x
+	ADDQ x, h
+
+	mergeRound(h, v1)
+	mergeRound(h, v2)
+	mergeRound(h, v3)
+	mergeRound(h, v4)
 
 
 	JMP afterBlocks
 	JMP afterBlocks
 
 
 noBlocks:
 noBlocks:
-	MOVQ ·prime5v(SB), AX
+	MOVQ ·primes+32(SB), h
 
 
 afterBlocks:
 afterBlocks:
-	ADDQ DX, AX
-
-	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
-	ADDQ $24, BX
-
-	CMPQ SI, BX
-	JG   fourByte
-
-wordLoop:
-	// Calculate k1.
-	MOVQ  (SI), R8
-	ADDQ  $8, SI
-	IMULQ R14, R8
-	ROLQ  $31, R8
-	IMULQ R13, R8
-
-	XORQ  R8, AX
-	ROLQ  $27, AX
-	IMULQ R13, AX
-	ADDQ  DI, AX
-
-	CMPQ SI, BX
-	JLE  wordLoop
-
-fourByte:
-	ADDQ $4, BX
-	CMPQ SI, BX
-	JG   singles
-
-	MOVL  (SI), R8
-	ADDQ  $4, SI
-	IMULQ R13, R8
-	XORQ  R8, AX
-
-	ROLQ  $23, AX
-	IMULQ R14, AX
-	ADDQ  ·prime3v(SB), AX
-
-singles:
-	ADDQ $4, BX
-	CMPQ SI, BX
+	ADDQ n, h
+
+	ADDQ $24, end
+	CMPQ p, end
+	JG   try4
+
+loop8:
+	MOVQ  (p), x
+	ADDQ  $8, p
+	round0(x)
+	XORQ  x, h
+	ROLQ  $27, h
+	IMULQ prime1, h
+	ADDQ  prime4, h
+
+	CMPQ p, end
+	JLE  loop8
+
+try4:
+	ADDQ $4, end
+	CMPQ p, end
+	JG   try1
+
+	MOVL  (p), x
+	ADDQ  $4, p
+	IMULQ prime1, x
+	XORQ  x, h
+
+	ROLQ  $23, h
+	IMULQ prime2, h
+	ADDQ  ·primes+16(SB), h
+
+try1:
+	ADDQ $4, end
+	CMPQ p, end
 	JGE  finalize
 	JGE  finalize
 
 
-singlesLoop:
-	MOVBQZX (SI), R12
-	ADDQ    $1, SI
-	IMULQ   ·prime5v(SB), R12
-	XORQ    R12, AX
+loop1:
+	MOVBQZX (p), x
+	ADDQ    $1, p
+	IMULQ   ·primes+32(SB), x
+	XORQ    x, h
+	ROLQ    $11, h
+	IMULQ   prime1, h
 
 
-	ROLQ  $11, AX
-	IMULQ R13, AX
-
-	CMPQ SI, BX
-	JL   singlesLoop
+	CMPQ p, end
+	JL   loop1
 
 
 finalize:
 finalize:
-	MOVQ  AX, R12
-	SHRQ  $33, R12
-	XORQ  R12, AX
-	IMULQ R14, AX
-	MOVQ  AX, R12
-	SHRQ  $29, R12
-	XORQ  R12, AX
-	IMULQ ·prime3v(SB), AX
-	MOVQ  AX, R12
-	SHRQ  $32, R12
-	XORQ  R12, AX
-
-	MOVQ AX, ret+24(FP)
+	MOVQ  h, x
+	SHRQ  $33, x
+	XORQ  x, h
+	IMULQ prime2, h
+	MOVQ  h, x
+	SHRQ  $29, x
+	XORQ  x, h
+	IMULQ ·primes+16(SB), h
+	MOVQ  h, x
+	SHRQ  $32, x
+	XORQ  x, h
+
+	MOVQ h, ret+24(FP)
 	RET
 	RET
 
 
-// writeBlocks uses the same registers as above except that it uses AX to store
-// the d pointer.
-
 // func writeBlocks(d *Digest, b []byte) int
 // func writeBlocks(d *Digest, b []byte) int
-TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
 	// Load fixed primes needed for round.
 	// Load fixed primes needed for round.
-	MOVQ ·prime1v(SB), R13
-	MOVQ ·prime2v(SB), R14
+	MOVQ ·primes+0(SB), prime1
+	MOVQ ·primes+8(SB), prime2
 
 
 	// Load slice.
 	// Load slice.
-	MOVQ b_base+8(FP), SI
-	MOVQ b_len+16(FP), DX
-	LEAQ (SI)(DX*1), BX
-	SUBQ $32, BX
+	MOVQ b_base+8(FP), p
+	MOVQ b_len+16(FP), n
+	LEAQ (p)(n*1), end
+	SUBQ $32, end
 
 
 	// Load vN from d.
 	// Load vN from d.
-	MOVQ d+0(FP), AX
-	MOVQ 0(AX), R8   // v1
-	MOVQ 8(AX), R9   // v2
-	MOVQ 16(AX), R10 // v3
-	MOVQ 24(AX), R11 // v4
+	MOVQ s+0(FP), d
+	MOVQ 0(d), v1
+	MOVQ 8(d), v2
+	MOVQ 16(d), v3
+	MOVQ 24(d), v4
 
 
 	// We don't need to check the loop condition here; this function is
 	// We don't need to check the loop condition here; this function is
 	// always called with at least one block of data to process.
 	// always called with at least one block of data to process.
-blockLoop:
-	round(R8)
-	round(R9)
-	round(R10)
-	round(R11)
-
-	CMPQ SI, BX
-	JLE  blockLoop
+	blockLoop()
 
 
 	// Copy vN back to d.
 	// Copy vN back to d.
-	MOVQ R8, 0(AX)
-	MOVQ R9, 8(AX)
-	MOVQ R10, 16(AX)
-	MOVQ R11, 24(AX)
-
-	// The number of bytes written is SI minus the old base pointer.
-	SUBQ b_base+8(FP), SI
-	MOVQ SI, ret+32(FP)
+	MOVQ v1, 0(d)
+	MOVQ v2, 8(d)
+	MOVQ v3, 16(d)
+	MOVQ v4, 24(d)
+
+	// The number of bytes written is p minus the old base pointer.
+	SUBQ b_base+8(FP), p
+	MOVQ p, ret+32(FP)
 
 
 	RET
 	RET

+ 69 - 71
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s

@@ -1,13 +1,17 @@
-// +build gc,!purego,!noasm
+//go:build !appengine && gc && !purego && !noasm
+// +build !appengine
+// +build gc
+// +build !purego
+// +build !noasm
 
 
 #include "textflag.h"
 #include "textflag.h"
 
 
-// Register allocation.
+// Registers:
 #define digest	R1
 #define digest	R1
-#define h	R2 // Return value.
-#define p	R3 // Input pointer.
-#define len	R4
-#define nblocks	R5 // len / 32.
+#define h	R2 // return value
+#define p	R3 // input pointer
+#define n	R4 // input length
+#define nblocks	R5 // n / 32
 #define prime1	R7
 #define prime1	R7
 #define prime2	R8
 #define prime2	R8
 #define prime3	R9
 #define prime3	R9
@@ -25,60 +29,52 @@
 #define round(acc, x) \
 #define round(acc, x) \
 	MADD prime2, acc, x, acc \
 	MADD prime2, acc, x, acc \
 	ROR  $64-31, acc         \
 	ROR  $64-31, acc         \
-	MUL  prime1, acc         \
+	MUL  prime1, acc
 
 
-// x = round(0, x).
+// round0 performs the operation x = round(0, x).
 #define round0(x) \
 #define round0(x) \
 	MUL prime2, x \
 	MUL prime2, x \
 	ROR $64-31, x \
 	ROR $64-31, x \
-	MUL prime1, x \
-
-#define mergeRound(x) \
-	round0(x)                 \
-	EOR  x, h                 \
-	MADD h, prime4, prime1, h \
-
-// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
-#define blocksLoop() \
-	LSR     $5, len, nblocks \
-	PCALIGN $16              \
-	loop:                    \
-	LDP.P   32(p), (x1, x2)  \
-	round(v1, x1)            \
-	LDP     -16(p), (x3, x4) \
-	round(v2, x2)            \
-	SUB     $1, nblocks      \
-	round(v3, x3)            \
-	round(v4, x4)            \
-	CBNZ    nblocks, loop    \
-
-// The primes are repeated here to ensure that they're stored
-// in a contiguous array, so we can load them with LDP.
-DATA primes<> +0(SB)/8, $11400714785074694791
-DATA primes<> +8(SB)/8, $14029467366897019727
-DATA primes<>+16(SB)/8, $1609587929392839161
-DATA primes<>+24(SB)/8, $9650029242287828579
-DATA primes<>+32(SB)/8, $2870177450012600261
-GLOBL primes<>(SB), NOPTR+RODATA, $40
+	MUL prime1, x
+
+#define mergeRound(acc, x) \
+	round0(x)                     \
+	EOR  x, acc                   \
+	MADD acc, prime4, prime1, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that n >= 32.
+#define blockLoop() \
+	LSR     $5, n, nblocks  \
+	PCALIGN $16             \
+	loop:                   \
+	LDP.P   16(p), (x1, x2) \
+	LDP.P   16(p), (x3, x4) \
+	round(v1, x1)           \
+	round(v2, x2)           \
+	round(v3, x3)           \
+	round(v4, x4)           \
+	SUB     $1, nblocks     \
+	CBNZ    nblocks, loop
 
 
 // func Sum64(b []byte) uint64
 // func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
-	LDP b_base+0(FP), (p, len)
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
+	LDP b_base+0(FP), (p, n)
 
 
-	LDP  primes<> +0(SB), (prime1, prime2)
-	LDP  primes<>+16(SB), (prime3, prime4)
-	MOVD primes<>+32(SB), prime5
+	LDP  ·primes+0(SB), (prime1, prime2)
+	LDP  ·primes+16(SB), (prime3, prime4)
+	MOVD ·primes+32(SB), prime5
 
 
-	CMP  $32, len
-	CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
-	BLO  afterLoop
+	CMP  $32, n
+	CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
+	BLT  afterLoop
 
 
 	ADD  prime1, prime2, v1
 	ADD  prime1, prime2, v1
 	MOVD prime2, v2
 	MOVD prime2, v2
 	MOVD $0, v3
 	MOVD $0, v3
 	NEG  prime1, v4
 	NEG  prime1, v4
 
 
-	blocksLoop()
+	blockLoop()
 
 
 	ROR $64-1, v1, x1
 	ROR $64-1, v1, x1
 	ROR $64-7, v2, x2
 	ROR $64-7, v2, x2
@@ -88,71 +84,75 @@ TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
 	ADD x3, x4
 	ADD x3, x4
 	ADD x2, x4, h
 	ADD x2, x4, h
 
 
-	mergeRound(v1)
-	mergeRound(v2)
-	mergeRound(v3)
-	mergeRound(v4)
+	mergeRound(h, v1)
+	mergeRound(h, v2)
+	mergeRound(h, v3)
+	mergeRound(h, v4)
 
 
 afterLoop:
 afterLoop:
-	ADD len, h
+	ADD n, h
 
 
-	TBZ   $4, len, try8
+	TBZ   $4, n, try8
 	LDP.P 16(p), (x1, x2)
 	LDP.P 16(p), (x1, x2)
 
 
 	round0(x1)
 	round0(x1)
+
+	// NOTE: here and below, sequencing the EOR after the ROR (using a
+	// rotated register) is worth a small but measurable speedup for small
+	// inputs.
 	ROR  $64-27, h
 	ROR  $64-27, h
 	EOR  x1 @> 64-27, h, h
 	EOR  x1 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 	MADD h, prime4, prime1, h
 
 
 	round0(x2)
 	round0(x2)
 	ROR  $64-27, h
 	ROR  $64-27, h
-	EOR  x2 @> 64-27, h
+	EOR  x2 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 	MADD h, prime4, prime1, h
 
 
 try8:
 try8:
-	TBZ    $3, len, try4
+	TBZ    $3, n, try4
 	MOVD.P 8(p), x1
 	MOVD.P 8(p), x1
 
 
 	round0(x1)
 	round0(x1)
 	ROR  $64-27, h
 	ROR  $64-27, h
-	EOR  x1 @> 64-27, h
+	EOR  x1 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 	MADD h, prime4, prime1, h
 
 
 try4:
 try4:
-	TBZ     $2, len, try2
+	TBZ     $2, n, try2
 	MOVWU.P 4(p), x2
 	MOVWU.P 4(p), x2
 
 
 	MUL  prime1, x2
 	MUL  prime1, x2
 	ROR  $64-23, h
 	ROR  $64-23, h
-	EOR  x2 @> 64-23, h
+	EOR  x2 @> 64-23, h, h
 	MADD h, prime3, prime2, h
 	MADD h, prime3, prime2, h
 
 
 try2:
 try2:
-	TBZ     $1, len, try1
+	TBZ     $1, n, try1
 	MOVHU.P 2(p), x3
 	MOVHU.P 2(p), x3
 	AND     $255, x3, x1
 	AND     $255, x3, x1
 	LSR     $8, x3, x2
 	LSR     $8, x3, x2
 
 
 	MUL prime5, x1
 	MUL prime5, x1
 	ROR $64-11, h
 	ROR $64-11, h
-	EOR x1 @> 64-11, h
+	EOR x1 @> 64-11, h, h
 	MUL prime1, h
 	MUL prime1, h
 
 
 	MUL prime5, x2
 	MUL prime5, x2
 	ROR $64-11, h
 	ROR $64-11, h
-	EOR x2 @> 64-11, h
+	EOR x2 @> 64-11, h, h
 	MUL prime1, h
 	MUL prime1, h
 
 
 try1:
 try1:
-	TBZ   $0, len, end
+	TBZ   $0, n, finalize
 	MOVBU (p), x4
 	MOVBU (p), x4
 
 
 	MUL prime5, x4
 	MUL prime5, x4
 	ROR $64-11, h
 	ROR $64-11, h
-	EOR x4 @> 64-11, h
+	EOR x4 @> 64-11, h, h
 	MUL prime1, h
 	MUL prime1, h
 
 
-end:
+finalize:
 	EOR h >> 33, h
 	EOR h >> 33, h
 	MUL prime2, h
 	MUL prime2, h
 	EOR h >> 29, h
 	EOR h >> 29, h
@@ -163,24 +163,22 @@ end:
 	RET
 	RET
 
 
 // func writeBlocks(d *Digest, b []byte) int
 // func writeBlocks(d *Digest, b []byte) int
-//
-// Assumes len(b) >= 32.
-TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
-	LDP primes<>(SB), (prime1, prime2)
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
+	LDP ·primes+0(SB), (prime1, prime2)
 
 
 	// Load state. Assume v[1-4] are stored contiguously.
 	// Load state. Assume v[1-4] are stored contiguously.
 	MOVD d+0(FP), digest
 	MOVD d+0(FP), digest
 	LDP  0(digest), (v1, v2)
 	LDP  0(digest), (v1, v2)
 	LDP  16(digest), (v3, v4)
 	LDP  16(digest), (v3, v4)
 
 
-	LDP b_base+8(FP), (p, len)
+	LDP b_base+8(FP), (p, n)
 
 
-	blocksLoop()
+	blockLoop()
 
 
 	// Store updated state.
 	// Store updated state.
 	STP (v1, v2), 0(digest)
 	STP (v1, v2), 0(digest)
 	STP (v3, v4), 16(digest)
 	STP (v3, v4), 16(digest)
 
 
-	BIC  $31, len
-	MOVD len, ret+32(FP)
+	BIC  $31, n
+	MOVD n, ret+32(FP)
 	RET
 	RET

+ 1 - 1
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go

@@ -13,4 +13,4 @@ package xxhash
 func Sum64(b []byte) uint64
 func Sum64(b []byte) uint64
 
 
 //go:noescape
 //go:noescape
-func writeBlocks(d *Digest, b []byte) int
+func writeBlocks(s *Digest, b []byte) int

+ 9 - 10
vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go

@@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 {
 	var h uint64
 	var h uint64
 
 
 	if n >= 32 {
 	if n >= 32 {
-		v1 := prime1v + prime2
+		v1 := primes[0] + prime2
 		v2 := prime2
 		v2 := prime2
 		v3 := uint64(0)
 		v3 := uint64(0)
-		v4 := -prime1v
+		v4 := -primes[0]
 		for len(b) >= 32 {
 		for len(b) >= 32 {
 			v1 = round(v1, u64(b[0:8:len(b)]))
 			v1 = round(v1, u64(b[0:8:len(b)]))
 			v2 = round(v2, u64(b[8:16:len(b)]))
 			v2 = round(v2, u64(b[8:16:len(b)]))
@@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 {
 
 
 	h += uint64(n)
 	h += uint64(n)
 
 
-	i, end := 0, len(b)
-	for ; i+8 <= end; i += 8 {
-		k1 := round(0, u64(b[i:i+8:len(b)]))
+	for ; len(b) >= 8; b = b[8:] {
+		k1 := round(0, u64(b[:8]))
 		h ^= k1
 		h ^= k1
 		h = rol27(h)*prime1 + prime4
 		h = rol27(h)*prime1 + prime4
 	}
 	}
-	if i+4 <= end {
-		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+	if len(b) >= 4 {
+		h ^= uint64(u32(b[:4])) * prime1
 		h = rol23(h)*prime2 + prime3
 		h = rol23(h)*prime2 + prime3
-		i += 4
+		b = b[4:]
 	}
 	}
-	for ; i < end; i++ {
-		h ^= uint64(b[i]) * prime5
+	for ; len(b) > 0; b = b[1:] {
+		h ^= uint64(b[0]) * prime5
 		h = rol11(h) * prime1
 		h = rol11(h) * prime1
 	}
 	}
 
 

+ 16 - 0
vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go

@@ -0,0 +1,16 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+// matchLen returns how many bytes match in a and b
+//
+// It assumes that:
+//
+//	len(a) <= len(b) and len(a) > 0
+//
+//go:noescape
+func matchLen(a []byte, b []byte) int

+ 68 - 0
vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s

@@ -0,0 +1,68 @@
+// Copied from S2 implementation.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+#include "textflag.h"
+
+// func matchLen(a []byte, b []byte) int
+// Requires: BMI
+TEXT ·matchLen(SB), NOSPLIT, $0-56
+	MOVQ a_base+0(FP), AX
+	MOVQ b_base+24(FP), CX
+	MOVQ a_len+8(FP), DX
+
+	// matchLen
+	XORL SI, SI
+	CMPL DX, $0x08
+	JB   matchlen_match4_standalone
+
+matchlen_loopback_standalone:
+	MOVQ  (AX)(SI*1), BX
+	XORQ  (CX)(SI*1), BX
+	TESTQ BX, BX
+	JZ    matchlen_loop_standalone
+
+#ifdef GOAMD64_v3
+	TZCNTQ BX, BX
+#else
+	BSFQ BX, BX
+#endif
+	SARQ $0x03, BX
+	LEAL (SI)(BX*1), SI
+	JMP  gen_match_len_end
+
+matchlen_loop_standalone:
+	LEAL -8(DX), DX
+	LEAL 8(SI), SI
+	CMPL DX, $0x08
+	JAE  matchlen_loopback_standalone
+
+matchlen_match4_standalone:
+	CMPL DX, $0x04
+	JB   matchlen_match2_standalone
+	MOVL (AX)(SI*1), BX
+	CMPL (CX)(SI*1), BX
+	JNE  matchlen_match2_standalone
+	LEAL -4(DX), DX
+	LEAL 4(SI), SI
+
+matchlen_match2_standalone:
+	CMPL DX, $0x02
+	JB   matchlen_match1_standalone
+	MOVW (AX)(SI*1), BX
+	CMPW (CX)(SI*1), BX
+	JNE  matchlen_match1_standalone
+	LEAL -2(DX), DX
+	LEAL 2(SI), SI
+
+matchlen_match1_standalone:
+	CMPL DX, $0x01
+	JB   gen_match_len_end
+	MOVB (AX)(SI*1), BL
+	CMPB (CX)(SI*1), BL
+	JNE  gen_match_len_end
+	INCL SI
+
+gen_match_len_end:
+	MOVQ SI, ret+48(FP)
+	RET

+ 33 - 0
vendor/github.com/klauspost/compress/zstd/matchlen_generic.go

@@ -0,0 +1,33 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+	"encoding/binary"
+	"math/bits"
+)
+
+// matchLen returns the maximum common prefix length of a and b.
+// a must be the shortest of the two.
+func matchLen(a, b []byte) (n int) {
+	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
+		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
+		if diff != 0 {
+			return n + bits.TrailingZeros64(diff)>>3
+		}
+		n += 8
+	}
+
+	for i := range a {
+		if a[i] != b[i] {
+			break
+		}
+		n++
+	}
+	return n
+
+}

+ 11 - 17
vendor/github.com/klauspost/compress/zstd/seqdec.go

@@ -236,13 +236,16 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		maxBlockSize = s.windowSize
 		maxBlockSize = s.windowSize
 	}
 	}
 
 
+	if debugDecoder {
+		println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream")
+	}
 	for i := seqs - 1; i >= 0; i-- {
 	for i := seqs - 1; i >= 0; i-- {
 		if br.overread() {
 		if br.overread() {
-			printf("reading sequence %d, exceeded available data\n", seqs-i)
+			printf("reading sequence %d, exceeded available data. Overread by %d\n", seqs-i, -br.remain())
 			return io.ErrUnexpectedEOF
 			return io.ErrUnexpectedEOF
 		}
 		}
 		var ll, mo, ml int
 		var ll, mo, ml int
-		if br.off > 4+((maxOffsetBits+16+16)>>3) {
+		if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
 			// inlined function:
 			// inlined function:
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
 
 
@@ -314,9 +317,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		}
 		}
 		size := ll + ml + len(out)
 		size := ll + ml + len(out)
 		if size-startSize > maxBlockSize {
 		if size-startSize > maxBlockSize {
-			if size-startSize == 424242 {
-				panic("here")
-			}
 			return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 			return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 		}
 		}
 		if size > cap(out) {
 		if size > cap(out) {
@@ -427,8 +427,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		}
 		}
 	}
 	}
 
 
-	// Check if space for literals
-	if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
+	if size := len(s.literals) + len(out) - startSize; size > maxBlockSize {
 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 	}
 	}
 
 
@@ -453,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol)
 
 
 	// extra bits are stored in reverse order.
 	// extra bits are stored in reverse order.
 	br.fill()
 	br.fill()
-	if s.maxBits <= 32 {
-		mo += br.getBits(moB)
-		ml += br.getBits(mlB)
-		ll += br.getBits(llB)
-	} else {
-		mo += br.getBits(moB)
+	mo += br.getBits(moB)
+	if s.maxBits > 32 {
 		br.fill()
 		br.fill()
-		// matchlength+literal length, max 32 bits
-		ml += br.getBits(mlB)
-		ll += br.getBits(llB)
-
 	}
 	}
+	// matchlength+literal length, max 32 bits
+	ml += br.getBits(mlB)
+	ll += br.getBits(llB)
 	mo = s.adjustOffset(mo, ll, moB)
 	mo = s.adjustOffset(mo, ll, moB)
 	return
 	return
 }
 }

+ 16 - 1
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go

@@ -5,6 +5,7 @@ package zstd
 
 
 import (
 import (
 	"fmt"
 	"fmt"
+	"io"
 
 
 	"github.com/klauspost/compress/internal/cpuinfo"
 	"github.com/klauspost/compress/internal/cpuinfo"
 )
 )
@@ -134,6 +135,9 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 		return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
 		return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
 			ctx.ll, ctx.litRemain+ctx.ll)
 			ctx.ll, ctx.litRemain+ctx.ll)
 
 
+	case errorOverread:
+		return true, io.ErrUnexpectedEOF
+
 	case errorNotEnoughSpace:
 	case errorNotEnoughSpace:
 		size := ctx.outPosition + ctx.ll + ctx.ml
 		size := ctx.outPosition + ctx.ll + ctx.ml
 		if debugDecoder {
 		if debugDecoder {
@@ -148,7 +152,6 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 	s.seqSize += ctx.litRemain
 	s.seqSize += ctx.litRemain
 	if s.seqSize > maxBlockSize {
 	if s.seqSize > maxBlockSize {
 		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
-
 	}
 	}
 	err := br.close()
 	err := br.close()
 	if err != nil {
 	if err != nil {
@@ -203,6 +206,9 @@ const errorNotEnoughLiterals = 4
 // error reported when capacity of `out` is too small
 // error reported when capacity of `out` is too small
 const errorNotEnoughSpace = 5
 const errorNotEnoughSpace = 5
 
 
+// error reported when bits are overread.
+const errorOverread = 6
+
 // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
 // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
 //
 //
 // Please refer to seqdec_generic.go for the reference implementation.
 // Please refer to seqdec_generic.go for the reference implementation.
@@ -248,6 +254,10 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 		litRemain: len(s.literals),
 		litRemain: len(s.literals),
 	}
 	}
 
 
+	if debugDecoder {
+		println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
+	}
+
 	s.seqSize = 0
 	s.seqSize = 0
 	lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
 	lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
 	var errCode int
 	var errCode int
@@ -278,6 +288,8 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 		case errorNotEnoughLiterals:
 		case errorNotEnoughLiterals:
 			ll := ctx.seqs[i].ll
 			ll := ctx.seqs[i].ll
 			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
 			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
+		case errorOverread:
+			return io.ErrUnexpectedEOF
 		}
 		}
 
 
 		return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
 		return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
@@ -292,6 +304,9 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 	if s.seqSize > maxBlockSize {
 	if s.seqSize > maxBlockSize {
 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 	}
 	}
+	if debugDecoder {
+		println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
+	}
 	err := br.close()
 	err := br.close()
 	if err != nil {
 	if err != nil {
 		printf("Closing sequences: %v, %+v\n", err, *br)
 		printf("Closing sequences: %v, %+v\n", err, *br)

+ 170 - 94
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s

@@ -5,11 +5,11 @@
 // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: CMOV
 // Requires: CMOV
 TEXT ·sequenceDecs_decode_amd64(SB), $8-32
 TEXT ·sequenceDecs_decode_amd64(SB), $8-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
 	MOVQ    ctx+16(FP), AX
@@ -38,7 +38,7 @@ sequenceDecs_decode_amd64_main_loop:
 
 
 sequenceDecs_decode_amd64_fill_byte_by_byte:
 sequenceDecs_decode_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_amd64_fill_end
+	JLE     sequenceDecs_decode_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_amd64_fill_end
 	JLE     sequenceDecs_decode_amd64_fill_end
 	SHLQ    $0x08, DX
 	SHLQ    $0x08, DX
@@ -49,6 +49,10 @@ sequenceDecs_decode_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_amd64_fill_byte_by_byte
 	JMP     sequenceDecs_decode_amd64_fill_byte_by_byte
 
 
+sequenceDecs_decode_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_amd64_fill_end:
 sequenceDecs_decode_amd64_fill_end:
 	// Update offset
 	// Update offset
 	MOVQ  R9, AX
 	MOVQ  R9, AX
@@ -105,7 +109,7 @@ sequenceDecs_decode_amd64_ml_update_zero:
 
 
 sequenceDecs_decode_amd64_fill_2_byte_by_byte:
 sequenceDecs_decode_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_amd64_fill_2_end
+	JLE     sequenceDecs_decode_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_amd64_fill_2_end
 	JLE     sequenceDecs_decode_amd64_fill_2_end
 	SHLQ    $0x08, DX
 	SHLQ    $0x08, DX
@@ -116,6 +120,10 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_amd64_fill_2_byte_by_byte
 	JMP     sequenceDecs_decode_amd64_fill_2_byte_by_byte
 
 
+sequenceDecs_decode_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_amd64_fill_2_end:
 sequenceDecs_decode_amd64_fill_2_end:
 	// Update literal length
 	// Update literal length
 	MOVQ  DI, AX
 	MOVQ  DI, AX
@@ -293,9 +301,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok:
 	MOVQ R12, 152(AX)
 	MOVQ R12, 152(AX)
 	MOVQ R13, 160(AX)
 	MOVQ R13, 160(AX)
 	MOVQ br+8(FP), AX
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)
 
 
 	// Return success
 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
 	MOVQ $0x00000000, ret+24(FP)
@@ -320,18 +328,19 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	MOVQ $0x00000004, ret+24(FP)
 	RET
 	RET
 
 
-	// Return with not enough output space error
-	MOVQ $0x00000005, ret+24(FP)
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
 	RET
 	RET
 
 
 // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: CMOV
 // Requires: CMOV
 TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
 TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
 	MOVQ    ctx+16(FP), AX
@@ -360,7 +369,7 @@ sequenceDecs_decode_56_amd64_main_loop:
 
 
 sequenceDecs_decode_56_amd64_fill_byte_by_byte:
 sequenceDecs_decode_56_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_56_amd64_fill_end
+	JLE     sequenceDecs_decode_56_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_56_amd64_fill_end
 	JLE     sequenceDecs_decode_56_amd64_fill_end
 	SHLQ    $0x08, DX
 	SHLQ    $0x08, DX
@@ -371,6 +380,10 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_56_amd64_fill_byte_by_byte
 	JMP     sequenceDecs_decode_56_amd64_fill_byte_by_byte
 
 
+sequenceDecs_decode_56_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_56_amd64_fill_end:
 sequenceDecs_decode_56_amd64_fill_end:
 	// Update offset
 	// Update offset
 	MOVQ  R9, AX
 	MOVQ  R9, AX
@@ -590,9 +603,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok:
 	MOVQ R12, 152(AX)
 	MOVQ R12, 152(AX)
 	MOVQ R13, 160(AX)
 	MOVQ R13, 160(AX)
 	MOVQ br+8(FP), AX
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)
 
 
 	// Return success
 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
 	MOVQ $0x00000000, ret+24(FP)
@@ -617,18 +630,19 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	MOVQ $0x00000004, ret+24(FP)
 	RET
 	RET
 
 
-	// Return with not enough output space error
-	MOVQ $0x00000005, ret+24(FP)
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
 	RET
 	RET
 
 
 // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: BMI, BMI2, CMOV
 // Requires: BMI, BMI2, CMOV
 TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
 TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
 	MOVQ    ctx+16(FP), CX
@@ -657,7 +671,7 @@ sequenceDecs_decode_bmi2_main_loop:
 
 
 sequenceDecs_decode_bmi2_fill_byte_by_byte:
 sequenceDecs_decode_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_bmi2_fill_end
+	JLE     sequenceDecs_decode_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_bmi2_fill_end
 	JLE     sequenceDecs_decode_bmi2_fill_end
 	SHLQ    $0x08, AX
 	SHLQ    $0x08, AX
@@ -668,6 +682,10 @@ sequenceDecs_decode_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_bmi2_fill_byte_by_byte
 	JMP     sequenceDecs_decode_bmi2_fill_byte_by_byte
 
 
+sequenceDecs_decode_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_bmi2_fill_end:
 sequenceDecs_decode_bmi2_fill_end:
 	// Update offset
 	// Update offset
 	MOVQ   $0x00000808, CX
 	MOVQ   $0x00000808, CX
@@ -708,7 +726,7 @@ sequenceDecs_decode_bmi2_fill_end:
 
 
 sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
 sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_bmi2_fill_2_end
+	JLE     sequenceDecs_decode_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_bmi2_fill_2_end
 	JLE     sequenceDecs_decode_bmi2_fill_2_end
 	SHLQ    $0x08, AX
 	SHLQ    $0x08, AX
@@ -719,6 +737,10 @@ sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_bmi2_fill_2_byte_by_byte
 	JMP     sequenceDecs_decode_bmi2_fill_2_byte_by_byte
 
 
+sequenceDecs_decode_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_bmi2_fill_2_end:
 sequenceDecs_decode_bmi2_fill_2_end:
 	// Update literal length
 	// Update literal length
 	MOVQ   $0x00000808, CX
 	MOVQ   $0x00000808, CX
@@ -870,9 +892,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok:
 	MOVQ R11, 152(CX)
 	MOVQ R11, 152(CX)
 	MOVQ R12, 160(CX)
 	MOVQ R12, 160(CX)
 	MOVQ br+8(FP), CX
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)
 
 
 	// Return success
 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
 	MOVQ $0x00000000, ret+24(FP)
@@ -897,18 +919,19 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	MOVQ $0x00000004, ret+24(FP)
 	RET
 	RET
 
 
-	// Return with not enough output space error
-	MOVQ $0x00000005, ret+24(FP)
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
 	RET
 	RET
 
 
 // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: BMI, BMI2, CMOV
 // Requires: BMI, BMI2, CMOV
 TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
 TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
 	MOVQ    ctx+16(FP), CX
@@ -937,7 +960,7 @@ sequenceDecs_decode_56_bmi2_main_loop:
 
 
 sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
 sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_56_bmi2_fill_end
+	JLE     sequenceDecs_decode_56_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_56_bmi2_fill_end
 	JLE     sequenceDecs_decode_56_bmi2_fill_end
 	SHLQ    $0x08, AX
 	SHLQ    $0x08, AX
@@ -948,6 +971,10 @@ sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_56_bmi2_fill_byte_by_byte
 	JMP     sequenceDecs_decode_56_bmi2_fill_byte_by_byte
 
 
+sequenceDecs_decode_56_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_56_bmi2_fill_end:
 sequenceDecs_decode_56_bmi2_fill_end:
 	// Update offset
 	// Update offset
 	MOVQ   $0x00000808, CX
 	MOVQ   $0x00000808, CX
@@ -1125,9 +1152,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
 	MOVQ R11, 152(CX)
 	MOVQ R11, 152(CX)
 	MOVQ R12, 160(CX)
 	MOVQ R12, 160(CX)
 	MOVQ br+8(FP), CX
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)
 
 
 	// Return success
 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
 	MOVQ $0x00000000, ret+24(FP)
@@ -1152,8 +1179,9 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	MOVQ $0x00000004, ret+24(FP)
 	RET
 	RET
 
 
-	// Return with not enough output space error
-	MOVQ $0x00000005, ret+24(FP)
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
 	RET
 	RET
 
 
 // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
 // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
@@ -1389,8 +1417,7 @@ loop_finished:
 	MOVQ ctx+0(FP), AX
 	MOVQ ctx+0(FP), AX
 	MOVQ DX, 24(AX)
 	MOVQ DX, 24(AX)
 	MOVQ DI, 104(AX)
 	MOVQ DI, 104(AX)
-	MOVQ 80(AX), CX
-	SUBQ CX, SI
+	SUBQ 80(AX), SI
 	MOVQ SI, 112(AX)
 	MOVQ SI, 112(AX)
 	RET
 	RET
 
 
@@ -1402,8 +1429,7 @@ error_match_off_too_big:
 	MOVQ ctx+0(FP), AX
 	MOVQ ctx+0(FP), AX
 	MOVQ DX, 24(AX)
 	MOVQ DX, 24(AX)
 	MOVQ DI, 104(AX)
 	MOVQ DI, 104(AX)
-	MOVQ 80(AX), CX
-	SUBQ CX, SI
+	SUBQ 80(AX), SI
 	MOVQ SI, 112(AX)
 	MOVQ SI, 112(AX)
 	RET
 	RET
 
 
@@ -1747,8 +1773,7 @@ loop_finished:
 	MOVQ ctx+0(FP), AX
 	MOVQ ctx+0(FP), AX
 	MOVQ DX, 24(AX)
 	MOVQ DX, 24(AX)
 	MOVQ DI, 104(AX)
 	MOVQ DI, 104(AX)
-	MOVQ 80(AX), CX
-	SUBQ CX, SI
+	SUBQ 80(AX), SI
 	MOVQ SI, 112(AX)
 	MOVQ SI, 112(AX)
 	RET
 	RET
 
 
@@ -1760,8 +1785,7 @@ error_match_off_too_big:
 	MOVQ ctx+0(FP), AX
 	MOVQ ctx+0(FP), AX
 	MOVQ DX, 24(AX)
 	MOVQ DX, 24(AX)
 	MOVQ DI, 104(AX)
 	MOVQ DI, 104(AX)
-	MOVQ 80(AX), CX
-	SUBQ CX, SI
+	SUBQ 80(AX), SI
 	MOVQ SI, 112(AX)
 	MOVQ SI, 112(AX)
 	RET
 	RET
 
 
@@ -1773,11 +1797,11 @@ empty_seqs:
 // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: CMOV, SSE
 // Requires: CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
 TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
 	MOVQ    ctx+16(FP), AX
@@ -1824,7 +1848,7 @@ sequenceDecs_decodeSync_amd64_main_loop:
 
 
 sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
 sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_amd64_fill_end
+	JLE     sequenceDecs_decodeSync_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_amd64_fill_end
 	JLE     sequenceDecs_decodeSync_amd64_fill_end
 	SHLQ    $0x08, DX
 	SHLQ    $0x08, DX
@@ -1835,6 +1859,10 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_amd64_fill_byte_by_byte
 	JMP     sequenceDecs_decodeSync_amd64_fill_byte_by_byte
 
 
+sequenceDecs_decodeSync_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_amd64_fill_end:
 sequenceDecs_decodeSync_amd64_fill_end:
 	// Update offset
 	// Update offset
 	MOVQ  R9, AX
 	MOVQ  R9, AX
@@ -1891,7 +1919,7 @@ sequenceDecs_decodeSync_amd64_ml_update_zero:
 
 
 sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
 sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
+	JLE     sequenceDecs_decodeSync_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
 	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
 	SHLQ    $0x08, DX
 	SHLQ    $0x08, DX
@@ -1902,6 +1930,10 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
 	JMP     sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
 
 
+sequenceDecs_decodeSync_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_amd64_fill_2_end:
 sequenceDecs_decodeSync_amd64_fill_2_end:
 	// Update literal length
 	// Update literal length
 	MOVQ  DI, AX
 	MOVQ  DI, AX
@@ -2263,9 +2295,9 @@ handle_loop:
 
 
 loop_finished:
 loop_finished:
 	MOVQ br+8(FP), AX
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)
 
 
 	// Update the context
 	// Update the context
 	MOVQ ctx+16(FP), AX
 	MOVQ ctx+16(FP), AX
@@ -2311,6 +2343,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	MOVQ $0x00000004, ret+24(FP)
 	RET
 	RET
 
 
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 	// Return with not enough output space error
 error_not_enough_space:
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
 	MOVQ ctx+16(FP), AX
@@ -2325,11 +2362,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: BMI, BMI2, CMOV, SSE
 // Requires: BMI, BMI2, CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
 TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
 	MOVQ    ctx+16(FP), CX
@@ -2376,7 +2413,7 @@ sequenceDecs_decodeSync_bmi2_main_loop:
 
 
 sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
 sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_bmi2_fill_end
+	JLE     sequenceDecs_decodeSync_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_bmi2_fill_end
 	JLE     sequenceDecs_decodeSync_bmi2_fill_end
 	SHLQ    $0x08, AX
 	SHLQ    $0x08, AX
@@ -2387,6 +2424,10 @@ sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
 	JMP     sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
 
 
+sequenceDecs_decodeSync_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_bmi2_fill_end:
 sequenceDecs_decodeSync_bmi2_fill_end:
 	// Update offset
 	// Update offset
 	MOVQ   $0x00000808, CX
 	MOVQ   $0x00000808, CX
@@ -2427,7 +2468,7 @@ sequenceDecs_decodeSync_bmi2_fill_end:
 
 
 sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
 sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
+	JLE     sequenceDecs_decodeSync_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
 	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
 	SHLQ    $0x08, AX
 	SHLQ    $0x08, AX
@@ -2438,6 +2479,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
 	JMP     sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
 
 
+sequenceDecs_decodeSync_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_bmi2_fill_2_end:
 sequenceDecs_decodeSync_bmi2_fill_2_end:
 	// Update literal length
 	// Update literal length
 	MOVQ   $0x00000808, CX
 	MOVQ   $0x00000808, CX
@@ -2773,9 +2818,9 @@ handle_loop:
 
 
 loop_finished:
 loop_finished:
 	MOVQ br+8(FP), CX
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)
 
 
 	// Update the context
 	// Update the context
 	MOVQ ctx+16(FP), AX
 	MOVQ ctx+16(FP), AX
@@ -2821,6 +2866,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	MOVQ $0x00000004, ret+24(FP)
 	RET
 	RET
 
 
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 	// Return with not enough output space error
 error_not_enough_space:
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
 	MOVQ ctx+16(FP), AX
@@ -2835,11 +2885,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: CMOV, SSE
 // Requires: CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
 TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
 	MOVQ    ctx+16(FP), AX
@@ -2886,7 +2936,7 @@ sequenceDecs_decodeSync_safe_amd64_main_loop:
 
 
 sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
 sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
 	SHLQ    $0x08, DX
 	SHLQ    $0x08, DX
@@ -2897,6 +2947,10 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
 
 
+sequenceDecs_decodeSync_safe_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_amd64_fill_end:
 sequenceDecs_decodeSync_safe_amd64_fill_end:
 	// Update offset
 	// Update offset
 	MOVQ  R9, AX
 	MOVQ  R9, AX
@@ -2953,7 +3007,7 @@ sequenceDecs_decodeSync_safe_amd64_ml_update_zero:
 
 
 sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
 sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
 	SHLQ    $0x08, DX
 	SHLQ    $0x08, DX
@@ -2964,6 +3018,10 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
 
 
+sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_amd64_fill_2_end:
 sequenceDecs_decodeSync_safe_amd64_fill_2_end:
 	// Update literal length
 	// Update literal length
 	MOVQ  DI, AX
 	MOVQ  DI, AX
@@ -3427,9 +3485,9 @@ handle_loop:
 
 
 loop_finished:
 loop_finished:
 	MOVQ br+8(FP), AX
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)
 
 
 	// Update the context
 	// Update the context
 	MOVQ ctx+16(FP), AX
 	MOVQ ctx+16(FP), AX
@@ -3475,6 +3533,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	MOVQ $0x00000004, ret+24(FP)
 	RET
 	RET
 
 
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 	// Return with not enough output space error
 error_not_enough_space:
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
 	MOVQ ctx+16(FP), AX
@@ -3489,11 +3552,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: BMI, BMI2, CMOV, SSE
 // Requires: BMI, BMI2, CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
 TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
 	MOVQ    ctx+16(FP), CX
@@ -3540,7 +3603,7 @@ sequenceDecs_decodeSync_safe_bmi2_main_loop:
 
 
 sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
 sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
 	SHLQ    $0x08, AX
 	SHLQ    $0x08, AX
@@ -3551,6 +3614,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
 
 
+sequenceDecs_decodeSync_safe_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_bmi2_fill_end:
 sequenceDecs_decodeSync_safe_bmi2_fill_end:
 	// Update offset
 	// Update offset
 	MOVQ   $0x00000808, CX
 	MOVQ   $0x00000808, CX
@@ -3591,7 +3658,7 @@ sequenceDecs_decodeSync_safe_bmi2_fill_end:
 
 
 sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
 sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
 	SHLQ    $0x08, AX
 	SHLQ    $0x08, AX
@@ -3602,6 +3669,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
 
 
+sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	// Update literal length
 	// Update literal length
 	MOVQ   $0x00000808, CX
 	MOVQ   $0x00000808, CX
@@ -4039,9 +4110,9 @@ handle_loop:
 
 
 loop_finished:
 loop_finished:
 	MOVQ br+8(FP), CX
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)
 
 
 	// Update the context
 	// Update the context
 	MOVQ ctx+16(FP), AX
 	MOVQ ctx+16(FP), AX
@@ -4087,6 +4158,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	MOVQ $0x00000004, ret+24(FP)
 	RET
 	RET
 
 
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 	// Return with not enough output space error
 error_not_enough_space:
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
 	MOVQ ctx+16(FP), AX

+ 1 - 1
vendor/github.com/klauspost/compress/zstd/seqdec_generic.go

@@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 	}
 	}
 	for i := range seqs {
 	for i := range seqs {
 		var ll, mo, ml int
 		var ll, mo, ml int
-		if br.off > 4+((maxOffsetBits+16+16)>>3) {
+		if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
 			// inlined function:
 			// inlined function:
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
 
 

+ 2 - 3
vendor/github.com/klauspost/compress/zstd/snappy.go

@@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 	var written int64
 	var written int64
 	var readHeader bool
 	var readHeader bool
 	{
 	{
-		var header []byte
-		var n int
-		header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
+		header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
 
 
+		var n int
 		n, r.err = w.Write(header)
 		n, r.err = w.Write(header)
 		if r.err != nil {
 		if r.err != nil {
 			return written, r.err
 			return written, r.err

+ 2 - 33
vendor/github.com/klauspost/compress/zstd/zstd.go

@@ -9,7 +9,6 @@ import (
 	"errors"
 	"errors"
 	"log"
 	"log"
 	"math"
 	"math"
-	"math/bits"
 )
 )
 
 
 // enable debug printing
 // enable debug printing
@@ -36,9 +35,6 @@ const forcePreDef = false
 // zstdMinMatch is the minimum zstd match length.
 // zstdMinMatch is the minimum zstd match length.
 const zstdMinMatch = 3
 const zstdMinMatch = 3
 
 
-// Reset the buffer offset when reaching this.
-const bufferReset = math.MaxInt32 - MaxWindowSize
-
 // fcsUnknown is used for unknown frame content size.
 // fcsUnknown is used for unknown frame content size.
 const fcsUnknown = math.MaxUint64
 const fcsUnknown = math.MaxUint64
 
 
@@ -75,7 +71,6 @@ var (
 	ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
 	ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
 
 
 	// ErrUnknownDictionary is returned if the dictionary ID is unknown.
 	// ErrUnknownDictionary is returned if the dictionary ID is unknown.
-	// For the time being dictionaries are not supported.
 	ErrUnknownDictionary = errors.New("unknown dictionary")
 	ErrUnknownDictionary = errors.New("unknown dictionary")
 
 
 	// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
 	// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
@@ -110,38 +105,12 @@ func printf(format string, a ...interface{}) {
 	}
 	}
 }
 }
 
 
-// matchLen returns the maximum length.
-// a must be the shortest of the two.
-// The function also returns whether all bytes matched.
-func matchLen(a, b []byte) int {
-	b = b[:len(a)]
-	for i := 0; i < len(a)-7; i += 8 {
-		if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-			return i + (bits.TrailingZeros64(diff) >> 3)
-		}
-	}
-
-	checked := (len(a) >> 3) << 3
-	a = a[checked:]
-	b = b[checked:]
-	for i := range a {
-		if a[i] != b[i] {
-			return i + checked
-		}
-	}
-	return len(a) + checked
-}
-
 func load3232(b []byte, i int32) uint32 {
 func load3232(b []byte, i int32) uint32 {
-	return binary.LittleEndian.Uint32(b[i:])
+	return binary.LittleEndian.Uint32(b[:len(b):len(b)][i:])
 }
 }
 
 
 func load6432(b []byte, i int32) uint64 {
 func load6432(b []byte, i int32) uint64 {
-	return binary.LittleEndian.Uint64(b[i:])
-}
-
-func load64(b []byte, i int) uint64 {
-	return binary.LittleEndian.Uint64(b[i:])
+	return binary.LittleEndian.Uint64(b[:len(b):len(b)][i:])
 }
 }
 
 
 type byter interface {
 type byter interface {

+ 2 - 2
vendor/modules.txt

@@ -467,8 +467,8 @@ github.com/ishidawataru/sctp
 # github.com/jmespath/go-jmespath v0.4.0
 # github.com/jmespath/go-jmespath v0.4.0
 ## explicit; go 1.14
 ## explicit; go 1.14
 github.com/jmespath/go-jmespath
 github.com/jmespath/go-jmespath
-# github.com/klauspost/compress v1.15.12
-## explicit; go 1.17
+# github.com/klauspost/compress v1.17.2
+## explicit; go 1.18
 github.com/klauspost/compress
 github.com/klauspost/compress
 github.com/klauspost/compress/fse
 github.com/klauspost/compress/fse
 github.com/klauspost/compress/huff0
 github.com/klauspost/compress/huff0