Merge pull request #43287 from thaJeztah/bump_dependencies_for_buildkit

vendor: update various dependencies in preparation of BuildKit update
This commit is contained in:
Brian Goff 2022-02-24 11:05:48 -08:00 committed by GitHub
commit 5c36bb7206
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
138 changed files with 4154 additions and 4007 deletions

View file

@ -17,9 +17,9 @@ require (
github.com/aws/aws-sdk-go v1.31.6
github.com/bsphere/le_go v0.0.0-20170215134836-7a984a84b549
github.com/cloudflare/cfssl v0.0.0-20180323000720-5d63dbd981b5
github.com/containerd/cgroups v1.0.1
github.com/containerd/cgroups v1.0.3
github.com/containerd/containerd v1.5.9
github.com/containerd/continuity v0.1.0
github.com/containerd/continuity v0.2.2
github.com/containerd/fifo v1.0.0
github.com/containerd/typeurl v1.0.2
github.com/coreos/go-systemd/v22 v22.3.2
@ -39,7 +39,7 @@ require (
github.com/godbus/dbus/v5 v5.0.6
github.com/gogo/protobuf v1.3.2
github.com/golang/gddo v0.0.0-20190904175337-72a348e765d2
github.com/google/go-cmp v0.5.5
github.com/google/go-cmp v0.5.6
github.com/google/uuid v1.3.0
github.com/gorilla/mux v1.8.0
github.com/hashicorp/go-immutable-radix v1.0.0
@ -48,7 +48,7 @@ require (
github.com/hashicorp/serf v0.8.2
github.com/imdario/mergo v0.3.12
github.com/ishidawataru/sctp v0.0.0-20210226210310-f2269e66cdee
github.com/klauspost/compress v1.12.3
github.com/klauspost/compress v1.14.2
github.com/miekg/dns v1.1.27
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible
github.com/moby/buildkit v0.8.2-0.20210615162540-9f254e18360a
@ -65,7 +65,7 @@ require (
github.com/opencontainers/runc v1.1.0
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
github.com/opencontainers/selinux v1.10.0
github.com/pelletier/go-toml v1.9.1
github.com/pelletier/go-toml v1.9.4
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.11.0
github.com/sirupsen/logrus v1.8.1
@ -105,7 +105,6 @@ require (
github.com/gogo/googleapis v1.4.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/golang/snappy v0.0.3 // indirect
github.com/google/btree v1.0.1 // indirect
github.com/google/certificate-transparency-go v1.0.20 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
@ -137,10 +136,10 @@ require (
go.opencensus.io v0.23.0 // indirect
golang.org/x/crypto v0.0.0-20211202192323-5770296d904e // indirect
golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5 // indirect
golang.org/x/mod v0.4.1 // indirect
golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c // indirect
golang.org/x/mod v0.4.2 // indirect
golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a // indirect
golang.org/x/text v0.3.6 // indirect
golang.org/x/tools v0.1.0 // indirect
golang.org/x/tools v0.1.5 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/api v0.46.0 // indirect
google.golang.org/appengine v1.6.7 // indirect

View file

@ -1,4 +1,5 @@
bazil.org/fuse v0.0.0-20160811212531-371fbbdaa898/go.mod h1:Xbm+BRKSBEpa4q4hTSxohYNQpsxXPbPry4JJWOB3LB8=
bazil.org/fuse v0.0.0-20200407214033-5883e5a4b512/go.mod h1:FbcW6z/2VytnFDhZfumh8Ss8zxHE6qpMP5sHTRe0EaM=
cloud.google.com/go v0.59.0 h1:BM3svUDU3itpc2m5cu5wCyThIYNDlFlts9GASw31GW8=
cloud.google.com/go v0.59.0/go.mod h1:qJxNOVCRTxHfwLhvDxxSI9vQc1zI59b9pEglp1Iv60E=
cloud.google.com/go/bigquery v1.0.0 h1:uzb+IRbJNYyU4lgbpqz7KKVjKO8XcF04rVFk6qbNTbM=
@ -125,8 +126,9 @@ github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59/go.mod h1:pA0z1
github.com/containerd/cgroups v0.0.0-20200710171044-318312a37340/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo=
github.com/containerd/cgroups v0.0.0-20200824123100-0b889c03f102/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo=
github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE=
github.com/containerd/cgroups v1.0.1 h1:iJnMvco9XGvKUvNQkv88bE4uJXxRQH18efbKo9w5vHQ=
github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU=
github.com/containerd/cgroups v1.0.3 h1:ADZftAkglvCiD44c77s5YmMqaP2pzVCFZvBmAlBdAP4=
github.com/containerd/cgroups v1.0.3/go.mod h1:/ofk34relqNjSGyqPrmEULrO4Sc8LJhvJmWbUCUKqj8=
github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE=
@ -156,8 +158,9 @@ github.com/containerd/continuity v0.0.0-20191127005431-f65d91d395eb/go.mod h1:GL
github.com/containerd/continuity v0.0.0-20200710164510-efbc4488d8fe/go.mod h1:cECdGN1O8G9bgKTlLhuPJimka6Xb/Gg7vYzCTNVxhvo=
github.com/containerd/continuity v0.0.0-20201208142359-180525291bb7/go.mod h1:kR3BEg7bDFaEddKm54WSmrol1fKWDU1nKYkgrcgZT7Y=
github.com/containerd/continuity v0.0.0-20210208174643-50096c924a4e/go.mod h1:EXlVlkqNba9rJe3j7w3Xa924itAMLgZH4UD/Q4PExuQ=
github.com/containerd/continuity v0.1.0 h1:UFRRY5JemiAhPZrr/uE0n8fMTLcZsUvySPr1+D7pgr8=
github.com/containerd/continuity v0.1.0/go.mod h1:ICJu0PwR54nI0yPEnJ6jcS+J7CZAUXrLh8lPo2knzsM=
github.com/containerd/continuity v0.2.2 h1:QSqfxcn8c+12slxwu00AtzXrsami0MJb/MQs9lOLHLA=
github.com/containerd/continuity v0.2.2/go.mod h1:pWygW9u7LtS1o4N/Tn0FoCFDIXZ7rxcMX7HX1Dmibvk=
github.com/containerd/fifo v0.0.0-20180307165137-3d5202aec260/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI=
github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI=
github.com/containerd/fifo v0.0.0-20200410184934-f15a3290365b/go.mod h1:jPQ2IAeZRCYxpS/Cm1495vGFww6ecHmMk1YJH2Q5ln0=
@ -348,7 +351,6 @@ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@ -365,8 +367,9 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
@ -464,8 +467,9 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.12.3 h1:G5AfA94pHPysR56qqrkO2pxEexdDzrpFJ6yt/VqWxVU=
github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.14.2 h1:S0OHlFk/Gbon/yauFJ4FfJJF5V0fc5HbBTJazi28pRw=
github.com/klauspost/compress v1.14.2/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@ -598,8 +602,9 @@ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYr
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc=
github.com/pelletier/go-toml v1.9.1 h1:a6qW1EVNZWH9WGI6CsYdD8WAylkoXBS5yv0XHlh17Tc=
github.com/pelletier/go-toml v1.9.1/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
github.com/phayes/permbits v0.0.0-20190612203442-39d7c581d2ee h1:P6U24L02WMfj9ymZTxl7CxS73JC99x3ukk+DBkgQGQs=
github.com/phayes/permbits v0.0.0-20190612203442-39d7c581d2ee/go.mod h1:3uODdxMgOaPYeWU7RzZLxVtJHZ/x1f/iHkBZuKJDzuY=
@ -697,6 +702,7 @@ github.com/tonistiigi/go-immutable-radix v0.0.0-20170803185627-826af9ccf0fe h1:p
github.com/tonistiigi/go-immutable-radix v0.0.0-20170803185627-826af9ccf0fe/go.mod h1:/+MCh11CJf2oz0BXmlmqyopK/ad1rKkcOXPoYuPCJYU=
github.com/tonistiigi/units v0.0.0-20180711220420-6950e57a87ea h1:SXhTLE6pb6eld/v/cCndK0AMpt1wiVFb/YYmqB3/QG0=
github.com/tonistiigi/units v0.0.0-20180711220420-6950e57a87ea/go.mod h1:WPnis/6cRcDZSUvVmezrxJPkiO87ThFYsoUiMwWNDJk=
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM=
github.com/uber/jaeger-client-go v2.25.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
github.com/uber/jaeger-lib v2.2.0+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
@ -724,6 +730,7 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs=
github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPSUX/bi6SeDMUh6brw0nXpxHnc96TguQh0+r/ssA=
github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg=
@ -738,6 +745,8 @@ go.opencensus.io v0.22.3 h1:8sGtKOrtQqkN1bp2AtX+misvLIlOmsEsNd+9NIcPEm8=
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA=
go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
golang.org/x/crypto v0.0.0-20171113213409-9f005a07e0d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
@ -772,8 +781,8 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.1 h1:Kvvh58BN8Y9/lBi7hTekvtMpm07eUZ0ck5pRHpsMWrY=
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2 h1:Gz96sIWK3OalVv/I/qNygP42zyoKp3xptRVCWRFEBvo=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181011144130-49bb7cea24b1/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -803,13 +812,14 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 h1:CIJ76btIcR3eFI5EgSo6k1qKw9KJexJuRLI9G7Hp5wE=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c h1:pkQiBZBvdos9qq4wBAHqlzuZHEXo07pqV06ef90u1WI=
golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a h1:4Kd8OPUx1xgUwrHDaviWZO8MsgoZTZYC3g+8m16RBww=
golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@ -875,12 +885,13 @@ golang.org/x/sys v0.0.0-20201117170446-d9b008d0a637/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201202213521-69691e467435/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210313202042-bd2e13477e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@ -927,8 +938,8 @@ golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roY
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200622203043-20e05c1c8ffa/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.0 h1:po9/4sTYwZU9lPhi1tOrb4hCv3qrhiQ77LZfGa2OjwY=
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA=
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View file

@ -1,7 +1,7 @@
# cgroups
[![Build Status](https://github.com/containerd/cgroups/workflows/CI/badge.svg)](https://github.com/containerd/cgroups/actions?query=workflow%3ACI)
[![codecov](https://codecov.io/gh/containerd/cgroups/branch/master/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups)
[![codecov](https://codecov.io/gh/containerd/cgroups/branch/main/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups)
[![GoDoc](https://godoc.org/github.com/containerd/cgroups?status.svg)](https://godoc.org/github.com/containerd/cgroups)
[![Go Report Card](https://goreportcard.com/badge/github.com/containerd/cgroups)](https://goreportcard.com/report/github.com/containerd/cgroups)
@ -142,8 +142,8 @@ All static path should not include `/sys/fs/cgroup/` prefix, it should start wit
Cgroups is a containerd sub-project, licensed under the [Apache 2.0 license](./LICENSE).
As a containerd sub-project, you will find the:
* [Project governance](https://github.com/containerd/project/blob/master/GOVERNANCE.md),
* [Maintainers](https://github.com/containerd/project/blob/master/MAINTAINERS),
* and [Contributing guidelines](https://github.com/containerd/project/blob/master/CONTRIBUTING.md)
* [Project governance](https://github.com/containerd/project/blob/main/GOVERNANCE.md),
* [Maintainers](https://github.com/containerd/project/blob/main/MAINTAINERS),
* and [Contributing guidelines](https://github.com/containerd/project/blob/main/CONTRIBUTING.md)
information in our [`containerd/project`](https://github.com/containerd/project) repository.

View file

@ -130,7 +130,7 @@ func (b *blkioController) Stat(path string, stats *v1.Metrics) error {
}
}
f, err := os.Open(filepath.Join(b.procRoot, "diskstats"))
f, err := os.Open(filepath.Join(b.procRoot, "partitions"))
if err != nil {
return err
}
@ -335,7 +335,10 @@ func getDevices(r io.Reader) (map[deviceKey]string, error) {
s = bufio.NewScanner(r)
devices = make(map[deviceKey]string)
)
for s.Scan() {
for i := 0; s.Scan(); i++ {
if i < 2 {
continue
}
fields := strings.Fields(s.Text())
major, err := strconv.Atoi(fields[0])
if err != nil {
@ -352,7 +355,7 @@ func getDevices(r io.Reader) (map[deviceKey]string, error) {
if _, ok := devices[key]; ok {
continue
}
devices[key] = filepath.Join("/dev", fields[2])
devices[key] = filepath.Join("/dev", fields[3])
}
return devices, s.Err()
}

View file

@ -17,6 +17,7 @@
package cgroups
import (
"errors"
"fmt"
"os"
"path/filepath"
@ -25,8 +26,8 @@ import (
"sync"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/opencontainers/runtime-spec/specs-go"
)
// New returns a new control via the cgroup cgroups interface
@ -83,7 +84,7 @@ func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) {
for _, s := range pathers(subsystems) {
p, err := path(s.Name())
if err != nil {
if os.IsNotExist(errors.Cause(err)) {
if errors.Is(err, os.ErrNotExist) {
return nil, ErrCgroupDeleted
}
if err == ErrControllerNotActive {
@ -149,8 +150,50 @@ func (c *cgroup) Subsystems() []Subsystem {
return c.subsystems
}
// Add moves the provided process into the new cgroup
func (c *cgroup) Add(process Process) error {
func (c *cgroup) subsystemsFilter(subsystems ...Name) []Subsystem {
if len(subsystems) == 0 {
return c.subsystems
}
var filteredSubsystems = []Subsystem{}
for _, s := range c.subsystems {
for _, f := range subsystems {
if s.Name() == f {
filteredSubsystems = append(filteredSubsystems, s)
break
}
}
}
return filteredSubsystems
}
// Add moves the provided process into the new cgroup.
// Without additional arguments, the process is added to all the cgroup subsystems.
// When giving Add a list of subsystem names, the process is only added to those
// subsystems, provided that they are active in the targeted cgroup.
func (c *cgroup) Add(process Process, subsystems ...Name) error {
return c.add(process, cgroupProcs, subsystems...)
}
// AddProc moves the provided process id into the new cgroup.
// Without additional arguments, the process with the given id is added to all
// the cgroup subsystems. When giving AddProc a list of subsystem names, the process
// id is only added to those subsystems, provided that they are active in the targeted
// cgroup.
func (c *cgroup) AddProc(pid uint64, subsystems ...Name) error {
return c.add(Process{Pid: int(pid)}, cgroupProcs, subsystems...)
}
// AddTask moves the provided tasks (threads) into the new cgroup.
// Without additional arguments, the task is added to all the cgroup subsystems.
// When giving AddTask a list of subsystem names, the task is only added to those
// subsystems, provided that they are active in the targeted cgroup.
func (c *cgroup) AddTask(process Process, subsystems ...Name) error {
return c.add(process, cgroupTasks, subsystems...)
}
func (c *cgroup) add(process Process, pType procType, subsystems ...Name) error {
if process.Pid <= 0 {
return ErrInvalidPid
}
@ -159,52 +202,19 @@ func (c *cgroup) Add(process Process) error {
if c.err != nil {
return c.err
}
return c.add(process)
}
func (c *cgroup) add(process Process) error {
for _, s := range pathers(c.subsystems) {
for _, s := range pathers(c.subsystemsFilter(subsystems...)) {
p, err := c.path(s.Name())
if err != nil {
return err
}
if err := retryingWriteFile(
filepath.Join(s.Path(p), cgroupProcs),
err = retryingWriteFile(
filepath.Join(s.Path(p), pType),
[]byte(strconv.Itoa(process.Pid)),
defaultFilePerm,
); err != nil {
return err
}
}
return nil
}
// AddTask moves the provided tasks (threads) into the new cgroup
func (c *cgroup) AddTask(process Process) error {
if process.Pid <= 0 {
return ErrInvalidPid
}
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return c.err
}
return c.addTask(process)
}
func (c *cgroup) addTask(process Process) error {
for _, s := range pathers(c.subsystems) {
p, err := c.path(s.Name())
)
if err != nil {
return err
}
if err := retryingWriteFile(
filepath.Join(s.Path(p), cgroupTasks),
[]byte(strconv.Itoa(process.Pid)),
defaultFilePerm,
); err != nil {
return err
}
}
return nil
}
@ -326,15 +336,29 @@ func (c *cgroup) Processes(subsystem Name, recursive bool) ([]Process, error) {
if c.err != nil {
return nil, c.err
}
return c.processes(subsystem, recursive)
return c.processes(subsystem, recursive, cgroupProcs)
}
func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) {
// Tasks returns the tasks running inside the cgroup along
// with the subsystem used, pid, and path
func (c *cgroup) Tasks(subsystem Name, recursive bool) ([]Task, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
return c.processes(subsystem, recursive, cgroupTasks)
}
func (c *cgroup) processes(subsystem Name, recursive bool, pType procType) ([]Process, error) {
s := c.getSubsystem(subsystem)
sp, err := c.path(subsystem)
if err != nil {
return nil, err
}
if s == nil {
return nil, fmt.Errorf("cgroups: %s doesn't exist in %s subsystem", sp, subsystem)
}
path := s.(pather).Path(sp)
var processes []Process
err = filepath.Walk(path, func(p string, info os.FileInfo, err error) error {
@ -348,10 +372,10 @@ func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) {
return filepath.SkipDir
}
dir, name := filepath.Split(p)
if name != cgroupProcs {
if name != pType {
return nil
}
procs, err := readPids(dir, subsystem)
procs, err := readPids(dir, subsystem, pType)
if err != nil {
return err
}
@ -361,49 +385,6 @@ func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) {
return processes, err
}
// Tasks returns the tasks running inside the cgroup along
// with the subsystem used, pid, and path
func (c *cgroup) Tasks(subsystem Name, recursive bool) ([]Task, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
return c.tasks(subsystem, recursive)
}
func (c *cgroup) tasks(subsystem Name, recursive bool) ([]Task, error) {
s := c.getSubsystem(subsystem)
sp, err := c.path(subsystem)
if err != nil {
return nil, err
}
path := s.(pather).Path(sp)
var tasks []Task
err = filepath.Walk(path, func(p string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !recursive && info.IsDir() {
if p == path {
return nil
}
return filepath.SkipDir
}
dir, name := filepath.Split(p)
if name != cgroupTasks {
return nil
}
procs, err := readTasksPids(dir, subsystem)
if err != nil {
return err
}
tasks = append(tasks, procs...)
return nil
})
return tasks, err
}
// Freeze freezes the entire cgroup and all the processes inside it
func (c *cgroup) Freeze() error {
c.mu.Lock()
@ -511,7 +492,7 @@ func (c *cgroup) MoveTo(destination Cgroup) error {
return c.err
}
for _, s := range c.subsystems {
processes, err := c.processes(s.Name(), true)
processes, err := c.processes(s.Name(), true, cgroupProcs)
if err != nil {
return err
}

View file

@ -23,10 +23,12 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
)
type procType = string
const (
cgroupProcs = "cgroup.procs"
cgroupTasks = "tasks"
defaultDirPerm = 0755
cgroupProcs procType = "cgroup.procs"
cgroupTasks procType = "tasks"
defaultDirPerm = 0755
)
// defaultFilePerm is a var so that the test framework can change the filemode
@ -37,32 +39,37 @@ const (
var defaultFilePerm = os.FileMode(0)
type Process struct {
// Subsystem is the name of the subsystem that the process is in
// Subsystem is the name of the subsystem that the process / task is in.
Subsystem Name
// Pid is the process id of the process
// Pid is the process id of the process / task.
Pid int
// Path is the full path of the subsystem and location that the process is in
// Path is the full path of the subsystem and location that the process / task is in.
Path string
}
type Task struct {
// Subsystem is the name of the subsystem that the task is in
Subsystem Name
// Pid is the process id of the task
Pid int
// Path is the full path of the subsystem and location that the task is in
Path string
}
type Task = Process
// Cgroup handles interactions with the individual groups to perform
// actions on them as them main interface to this cgroup package
type Cgroup interface {
// New creates a new cgroup under the calling cgroup
New(string, *specs.LinuxResources) (Cgroup, error)
// Add adds a process to the cgroup (cgroup.procs)
Add(Process) error
// AddTask adds a process to the cgroup (tasks)
AddTask(Process) error
// Add adds a process to the cgroup (cgroup.procs). Without additional arguments,
// the process is added to all the cgroup subsystems. When giving Add a list of
// subsystem names, the process is only added to those subsystems, provided that
// they are active in the targeted cgroup.
Add(Process, ...Name) error
// AddProc adds the process with the given id to the cgroup (cgroup.procs).
// Without additional arguments, the process with the given id is added to all
// the cgroup subsystems. When giving AddProc a list of subsystem names, the process
// id is only added to those subsystems, provided that they are active in the targeted
// cgroup.
AddProc(uint64, ...Name) error
// AddTask adds a process to the cgroup (tasks). Without additional arguments, the
// task is added to all the cgroup subsystems. When giving AddTask a list of subsystem
// names, the task is only added to those subsystems, provided that they are active in
// the targeted cgroup.
AddTask(Process, ...Name) error
// Delete removes the cgroup as a whole
Delete() error
// MoveTo moves all the processes under the calling cgroup to the provided one

View file

@ -17,7 +17,7 @@
package cgroups
import (
"github.com/pkg/errors"
"errors"
)
var (

View file

@ -17,10 +17,9 @@
package cgroups
import (
"errors"
"fmt"
"path/filepath"
"github.com/pkg/errors"
)
type Path func(subsystem Name) (string, error)
@ -39,7 +38,7 @@ func StaticPath(path string) Path {
// NestedPath will nest the cgroups based on the calling processes cgroup
// placing its child processes inside its own path
func NestedPath(suffix string) Path {
paths, err := parseCgroupFile("/proc/self/cgroup")
paths, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return errorPath(err)
}
@ -50,9 +49,9 @@ func NestedPath(suffix string) Path {
// This is commonly used for the Load function to restore an existing container
func PidPath(pid int) Path {
p := fmt.Sprintf("/proc/%d/cgroup", pid)
paths, err := parseCgroupFile(p)
paths, err := ParseCgroupFile(p)
if err != nil {
return errorPath(errors.Wrapf(err, "parse cgroup file %s", p))
return errorPath(fmt.Errorf("parse cgroup file %s: %w", p, err))
}
return existingPath(paths, "")
}

View file

@ -67,6 +67,7 @@ func (p *rdmaController) Create(path string, resources *specs.LinuxResources) er
for device, limit := range resources.Rdma {
if device != "" && (limit.HcaHandles != nil || limit.HcaObjects != nil) {
limit := limit
return retryingWriteFile(
filepath.Join(p.Path(path), "rdma.max"),
[]byte(createCmdString(device, &limit)),

View file

@ -17,6 +17,7 @@
package cgroups
import (
"context"
"path/filepath"
"strings"
"sync"
@ -78,7 +79,8 @@ func (s *SystemdController) Name() Name {
}
func (s *SystemdController) Create(path string, _ *specs.LinuxResources) error {
conn, err := systemdDbus.New()
ctx := context.TODO()
conn, err := systemdDbus.NewWithContext(ctx)
if err != nil {
return err
}
@ -90,7 +92,7 @@ func (s *SystemdController) Create(path string, _ *specs.LinuxResources) error {
checkDelegate := func() {
canDelegate = true
dlSlice := newProperty("Delegate", true)
if _, err := conn.StartTransientUnit(slice, "testdelegate", []systemdDbus.Property{dlSlice}, nil); err != nil {
if _, err := conn.StartTransientUnitContext(ctx, slice, "testdelegate", []systemdDbus.Property{dlSlice}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
// Starting with systemd v237, Delegate is not even a property of slices anymore,
// so the D-Bus call fails with "InvalidArgs" error.
@ -100,7 +102,7 @@ func (s *SystemdController) Create(path string, _ *specs.LinuxResources) error {
}
}
conn.StopUnit(slice, "testDelegate", nil)
_, _ = conn.StopUnitContext(ctx, slice, "testDelegate", nil)
}
once.Do(checkDelegate)
properties := []systemdDbus.Property{
@ -118,7 +120,7 @@ func (s *SystemdController) Create(path string, _ *specs.LinuxResources) error {
}
ch := make(chan string)
_, err = conn.StartTransientUnit(name, "replace", properties, ch)
_, err = conn.StartTransientUnitContext(ctx, name, "replace", properties, ch)
if err != nil {
return err
}
@ -127,14 +129,15 @@ func (s *SystemdController) Create(path string, _ *specs.LinuxResources) error {
}
func (s *SystemdController) Delete(path string) error {
conn, err := systemdDbus.New()
ctx := context.TODO()
conn, err := systemdDbus.NewWithContext(ctx)
if err != nil {
return err
}
defer conn.Close()
_, name := splitName(path)
ch := make(chan string)
_, err = conn.StopUnit(name, "replace", ch)
_, err = conn.StopUnitContext(ctx, name, "replace", ch)
if err != nil {
return err
}

View file

@ -164,9 +164,9 @@ func remove(path string) error {
return fmt.Errorf("cgroups: unable to remove path %q", path)
}
// readPids will read all the pids of processes in a cgroup by the provided path
func readPids(path string, subsystem Name) ([]Process, error) {
f, err := os.Open(filepath.Join(path, cgroupProcs))
// readPids will read all the pids of processes or tasks in a cgroup by the provided path
func readPids(path string, subsystem Name, pType procType) ([]Process, error) {
f, err := os.Open(filepath.Join(path, pType))
if err != nil {
return nil, err
}
@ -195,36 +195,6 @@ func readPids(path string, subsystem Name) ([]Process, error) {
return out, nil
}
// readTasksPids will read all the pids of tasks in a cgroup by the provided path
func readTasksPids(path string, subsystem Name) ([]Task, error) {
f, err := os.Open(filepath.Join(path, cgroupTasks))
if err != nil {
return nil, err
}
defer f.Close()
var (
out []Task
s = bufio.NewScanner(f)
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, Task{
Pid: pid,
Subsystem: subsystem,
Path: path,
})
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
func hugePageSizes() ([]string, error) {
var (
pageSizes []string
@ -285,7 +255,16 @@ func parseKV(raw string) (string, uint64, error) {
}
}
func parseCgroupFile(path string) (map[string]string, error) {
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
// "cpu": "/user.slice/user-1000.slice"
// "pids": "/user.slice/user-1000.slice"
// etc.
//
// Note that for cgroup v2 unified hierarchy, there are no per-controller
// cgroup paths, so the resulting map will have a single element where the key
// is empty string ("") and the value is the cgroup path the <pid> is in.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err

View file

@ -23,15 +23,16 @@
//
// This particular Go implementation based on runc version
// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
package v2
import (
"errors"
"fmt"
"math"
"github.com/cilium/ebpf/asm"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
@ -106,13 +107,13 @@ func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error {
hasType = false
default:
// if not specified in OCI json, typ is set to DeviceTypeAll
return errors.Errorf("invalid DeviceType %q", dev.Type)
return fmt.Errorf("invalid DeviceType %q", dev.Type)
}
if *dev.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", *dev.Major)
return fmt.Errorf("invalid major %d", *dev.Major)
}
if *dev.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", *dev.Major)
return fmt.Errorf("invalid minor %d", *dev.Major)
}
hasMajor := *dev.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := *dev.Minor >= 0
@ -126,7 +127,7 @@ func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error {
case 'm':
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
default:
return errors.Errorf("unknown device access %v", r)
return fmt.Errorf("unknown device access %v", r)
}
}
// If the access is rwm, skip the check.

View file

@ -17,11 +17,12 @@
package v2
import (
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
@ -50,7 +51,7 @@ func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD
Flags: unix.BPF_F_ALLOW_MULTI,
})
if err != nil {
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
}
closer := func() error {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
@ -59,7 +60,7 @@ func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE)")
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
}
return nil
}

View file

@ -18,29 +18,9 @@ package v2
import (
"errors"
"os"
)
var (
ErrInvalidPid = errors.New("cgroups: pid must be greater than 0")
ErrMountPointNotExist = errors.New("cgroups: cgroup mountpoint does not exist")
ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed")
ErrFreezerNotSupported = errors.New("cgroups: freezer cgroup (v2) not supported on this system")
ErrMemoryNotSupported = errors.New("cgroups: memory cgroup (v2) not supported on this system")
ErrPidsNotSupported = errors.New("cgroups: pids cgroup (v2) not supported on this system")
ErrCPUNotSupported = errors.New("cgroups: cpu cgroup (v2) not supported on this system")
ErrCgroupDeleted = errors.New("cgroups: cgroup deleted")
ErrNoCgroupMountDestination = errors.New("cgroups: cannot find cgroup mount destination")
ErrInvalidGroupPath = errors.New("cgroups: invalid group path")
ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed")
ErrInvalidGroupPath = errors.New("cgroups: invalid group path")
)
// ErrorHandler is a function that handles and acts on errors
type ErrorHandler func(err error) error
// IgnoreNotExist ignores any errors that are for not existing files
func IgnoreNotExist(err error) error {
if os.IsNotExist(err) {
return nil
}
return err
}

View file

@ -18,6 +18,9 @@ package v2
import (
"bufio"
"context"
"errors"
"fmt"
"io/ioutil"
"math"
"os"
@ -28,10 +31,10 @@ import (
"time"
"github.com/containerd/cgroups/v2/stats"
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"github.com/godbus/dbus/v5"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -270,7 +273,9 @@ func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) er
// When running as rootless, the user may face EPERM on parent groups, but it is neglible when the
// controller is already written.
// So we only return the last error.
lastErr = errors.Wrapf(err, "failed to write subtree controllers %+v to %q", controllers, filePath)
lastErr = fmt.Errorf("failed to write subtree controllers %+v to %q: %w", controllers, filePath, err)
} else {
lastErr = nil
}
}
return lastErr
@ -300,15 +305,23 @@ func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error)
if err := os.MkdirAll(path, defaultDirPerm); err != nil {
return nil, err
}
m := Manager{
unifiedMountpoint: c.unifiedMountpoint,
path: path,
}
if resources != nil {
if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
// clean up cgroup dir on failure
os.Remove(path)
return nil, err
}
}
if err := setResources(path, resources); err != nil {
// clean up cgroup dir on failure
os.Remove(path)
return nil, err
}
return &Manager{
unifiedMountpoint: c.unifiedMountpoint,
path: path,
}, nil
return &m, nil
}
func (c *Manager) AddProc(pid uint64) error {
@ -515,7 +528,7 @@ func readKVStatsFile(path string, file string, out map[string]interface{}) error
for s.Scan() {
name, value, err := parseKV(s.Text())
if err != nil {
return errors.Wrapf(err, "error while parsing %s (line=%q)", filepath.Join(path, file), s.Text())
return fmt.Errorf("error while parsing %s (line=%q): %w", filepath.Join(path, file), s.Text(), err)
}
out[name] = value
}
@ -547,17 +560,39 @@ func (c *Manager) freeze(path string, state State) error {
}
}
func (c *Manager) isCgroupEmpty() bool {
// In case of any error we return true so that we exit and don't leak resources
out := make(map[string]interface{})
if err := readKVStatsFile(c.path, "cgroup.events", out); err != nil {
return true
}
if v, ok := out["populated"]; ok {
populated, ok := v.(uint64)
if !ok {
return true
}
return populated == 0
}
return true
}
// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor
func (c *Manager) MemoryEventFD() (int, uint32, error) {
fpath := filepath.Join(c.path, "memory.events")
fd, err := syscall.InotifyInit()
if err != nil {
return 0, 0, errors.Errorf("Failed to create inotify fd")
return 0, 0, errors.New("failed to create inotify fd")
}
wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY)
if wd < 0 {
if err != nil {
syscall.Close(fd)
return 0, 0, errors.Errorf("Failed to add inotify watch for %q", fpath)
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", fpath, err)
}
// monitor to detect process exit/cgroup deletion
evpath := filepath.Join(c.path, "cgroup.events")
if _, err = syscall.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil {
syscall.Close(fd)
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", evpath, err)
}
return fd, uint32(wd), nil
@ -565,22 +600,56 @@ func (c *Manager) MemoryEventFD() (int, uint32, error) {
func (c *Manager) EventChan() (<-chan Event, <-chan error) {
ec := make(chan Event)
errCh := make(chan error)
errCh := make(chan error, 1)
go c.waitForEvents(ec, errCh)
return ec, nil
return ec, errCh
}
func parseMemoryEvents(out map[string]interface{}) (Event, error) {
e := Event{}
if v, ok := out["high"]; ok {
e.High, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert high to uint64: %+v", v)
}
}
if v, ok := out["low"]; ok {
e.Low, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert low to uint64: %+v", v)
}
}
if v, ok := out["max"]; ok {
e.Max, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert max to uint64: %+v", v)
}
}
if v, ok := out["oom"]; ok {
e.OOM, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert oom to uint64: %+v", v)
}
}
if v, ok := out["oom_kill"]; ok {
e.OOMKill, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert oom_kill to uint64: %+v", v)
}
}
return e, nil
}
func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
fd, wd, err := c.MemoryEventFD()
defer syscall.InotifyRmWatch(fd, wd)
defer syscall.Close(fd)
defer close(errCh)
fd, _, err := c.MemoryEventFD()
if err != nil {
errCh <- err
return
}
defer syscall.Close(fd)
for {
buffer := make([]byte, syscall.SizeofInotifyEvent*10)
@ -591,48 +660,22 @@ func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
}
if bytesRead >= syscall.SizeofInotifyEvent {
out := make(map[string]interface{})
if err := readKVStatsFile(c.path, "memory.events", out); err == nil {
e := Event{}
if v, ok := out["high"]; ok {
e.High, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert high to uint64: %+v", v)
return
}
if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
// When cgroup is deleted read may return -ENODEV instead of -ENOENT from open.
if _, statErr := os.Lstat(filepath.Join(c.path, "memory.events")); !os.IsNotExist(statErr) {
errCh <- err
}
if v, ok := out["low"]; ok {
e.Low, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert low to uint64: %+v", v)
return
}
}
if v, ok := out["max"]; ok {
e.Max, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert max to uint64: %+v", v)
return
}
}
if v, ok := out["oom"]; ok {
e.OOM, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert oom to uint64: %+v", v)
return
}
}
if v, ok := out["oom_kill"]; ok {
e.OOMKill, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert oom_kill to uint64: %+v", v)
return
}
}
ec <- e
} else {
return
}
e, err := parseMemoryEvents(out)
if err != nil {
errCh <- err
return
}
ec <- e
if c.isCgroupEmpty() {
return
}
}
}
}
@ -645,9 +688,9 @@ func setDevices(path string, devices []specs.LinuxDeviceCgroup) error {
if err != nil {
return err
}
dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0600)
if err != nil {
return errors.Errorf("cannot get dir FD for %s", path)
return fmt.Errorf("cannot get dir FD for %s", path)
}
defer unix.Close(dirFD)
if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
@ -662,8 +705,9 @@ func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, e
if slice == "" {
slice = defaultSlice
}
ctx := context.TODO()
path := filepath.Join(defaultCgroup2Path, slice, group)
conn, err := systemdDbus.New()
conn, err := systemdDbus.NewWithContext(ctx)
if err != nil {
return &Manager{}, err
}
@ -733,7 +777,7 @@ func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, e
}
statusChan := make(chan string, 1)
if _, err := conn.StartTransientUnit(group, "replace", properties, statusChan); err == nil {
if _, err := conn.StartTransientUnitContext(ctx, group, "replace", properties, statusChan); err == nil {
select {
case <-statusChan:
case <-time.After(time.Second):
@ -759,14 +803,15 @@ func LoadSystemd(slice, group string) (*Manager, error) {
}
func (c *Manager) DeleteSystemd() error {
conn, err := systemdDbus.New()
ctx := context.TODO()
conn, err := systemdDbus.NewWithContext(ctx)
if err != nil {
return err
}
defer conn.Close()
group := systemdUnitFromPath(c.path)
ch := make(chan string)
_, err = conn.StopUnit(group, "replace", ch)
_, err = conn.StopUnitContext(ctx, group, "replace", ch)
if err != nil {
return err
}

View file

@ -29,9 +29,9 @@ import (
"time"
"github.com/containerd/cgroups/v2/stats"
"github.com/godbus/dbus/v5"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -61,7 +61,7 @@ func remove(path string) error {
return nil
}
}
return errors.Wrapf(err, "cgroups: unable to remove path %q", path)
return fmt.Errorf("cgroups: unable to remove path %q: %w", path, err)
}
// parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs

View file

@ -12,8 +12,11 @@ Darren Stahl <darst@microsoft.com>
Derek McGowan <derek@mcg.dev>
Derek McGowan <derek@mcgstyle.net>
Edward Pilatowicz <edward.pilatowicz@oracle.com>
Fu Wei <fuweid89@gmail.com>
Hajime Tazaki <thehajime@gmail.com>
Ian Campbell <ijc@docker.com>
Ivan Markin <sw@nogoegst.net>
Jacob Blain Christen <jacob@rancher.com>
Justin Cormack <justin.cormack@docker.com>
Justin Cummins <sul3n3t@gmail.com>
Kasper Fabæch Brandt <poizan@poizan.dk>
@ -23,10 +26,11 @@ Michael Crosby <michael@thepasture.io>
Michael Wan <zirenwan@gmail.com>
Mike Brown <brownwm@us.ibm.com>
Niels de Vos <ndevos@redhat.com>
Phil Estes <estesp@amazon.com>
Phil Estes <estesp@gmail.com>
Phil Estes <estesp@linux.vnet.ibm.com>
Samuel Karp <me@samuelkarp.com>
Sam Whited <sam@samwhited.com>
Samuel Karp <me@samuelkarp.com>
Sebastiaan van Stijn <github@gone.nl>
Shengjing Zhu <zhsj@debian.org>
Stephen J Day <stephen.day@docker.com>

View file

@ -1,4 +1,5 @@
// +build linux darwin freebsd solaris
//go:build !windows
// +build !windows
/*
Copyright The containerd Authors.

View file

@ -17,11 +17,10 @@
package devices
import (
"fmt"
"os"
"github.com/pkg/errors"
)
func DeviceInfo(fi os.FileInfo) (uint64, uint64, error) {
return 0, 0, errors.Wrap(ErrNotSupported, "cannot get device info on windows")
return 0, 0, fmt.Errorf("cannot get device info on windows: %w", ErrNotSupported)
}

View file

@ -1,3 +1,4 @@
//go:build freebsd
// +build freebsd
/*

View file

@ -1,4 +1,5 @@
// +build linux darwin solaris
//go:build !(freebsd || windows)
// +build !freebsd,!windows
/*
Copyright The containerd Authors.

View file

@ -1,4 +1,5 @@
// +build linux darwin freebsd solaris
//go:build !windows
// +build !windows
/*
Copyright The containerd Authors.

View file

@ -1,3 +1,4 @@
//go:build go1.13
// +build go1.13
/*

View file

@ -1,4 +1,5 @@
// +build darwin freebsd solaris
//go:build darwin || freebsd || netbsd || openbsd || solaris
// +build darwin freebsd netbsd openbsd solaris
/*
Copyright The containerd Authors.

View file

@ -17,12 +17,11 @@
package fs
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sync"
"github.com/pkg/errors"
)
var bufferPool = &sync.Pool{
@ -92,35 +91,35 @@ func CopyDir(dst, src string, opts ...CopyDirOpt) error {
func copyDirectory(dst, src string, inodes map[uint64]string, o *copyDirOpts) error {
stat, err := os.Stat(src)
if err != nil {
return errors.Wrapf(err, "failed to stat %s", src)
return fmt.Errorf("failed to stat %s: %w", src, err)
}
if !stat.IsDir() {
return errors.Errorf("source %s is not directory", src)
return fmt.Errorf("source %s is not directory", src)
}
if st, err := os.Stat(dst); err != nil {
if err := os.Mkdir(dst, stat.Mode()); err != nil {
return errors.Wrapf(err, "failed to mkdir %s", dst)
return fmt.Errorf("failed to mkdir %s: %w", dst, err)
}
} else if !st.IsDir() {
return errors.Errorf("cannot copy to non-directory: %s", dst)
return fmt.Errorf("cannot copy to non-directory: %s", dst)
} else {
if err := os.Chmod(dst, stat.Mode()); err != nil {
return errors.Wrapf(err, "failed to chmod on %s", dst)
return fmt.Errorf("failed to chmod on %s: %w", dst, err)
}
}
fis, err := ioutil.ReadDir(src)
if err != nil {
return errors.Wrapf(err, "failed to read %s", src)
return fmt.Errorf("failed to read %s: %w", src, err)
}
if err := copyFileInfo(stat, dst); err != nil {
return errors.Wrapf(err, "failed to copy file info for %s", dst)
if err := copyFileInfo(stat, src, dst); err != nil {
return fmt.Errorf("failed to copy file info for %s: %w", dst, err)
}
if err := copyXAttrs(dst, src, o.xex, o.xeh); err != nil {
return errors.Wrap(err, "failed to copy xattrs")
return fmt.Errorf("failed to copy xattrs: %w", err)
}
for _, fi := range fis {
@ -136,37 +135,38 @@ func copyDirectory(dst, src string, inodes map[uint64]string, o *copyDirOpts) er
case (fi.Mode() & os.ModeType) == 0:
link, err := getLinkSource(target, fi, inodes)
if err != nil {
return errors.Wrap(err, "failed to get hardlink")
return fmt.Errorf("failed to get hardlink: %w", err)
}
if link != "" {
if err := os.Link(link, target); err != nil {
return errors.Wrap(err, "failed to create hard link")
return fmt.Errorf("failed to create hard link: %w", err)
}
} else if err := CopyFile(target, source); err != nil {
return errors.Wrap(err, "failed to copy files")
return fmt.Errorf("failed to copy files: %w", err)
}
case (fi.Mode() & os.ModeSymlink) == os.ModeSymlink:
link, err := os.Readlink(source)
if err != nil {
return errors.Wrapf(err, "failed to read link: %s", source)
return fmt.Errorf("failed to read link: %s: %w", source, err)
}
if err := os.Symlink(link, target); err != nil {
return errors.Wrapf(err, "failed to create symlink: %s", target)
return fmt.Errorf("failed to create symlink: %s: %w", target, err)
}
case (fi.Mode() & os.ModeDevice) == os.ModeDevice:
if err := copyDevice(target, fi); err != nil {
return errors.Wrapf(err, "failed to create device")
return fmt.Errorf("failed to create device: %w", err)
}
default:
// TODO: Support pipes and sockets
return errors.Wrapf(err, "unsupported mode %s", fi.Mode())
return fmt.Errorf("unsupported mode %s: %w", fi.Mode(), err)
}
if err := copyFileInfo(fi, target); err != nil {
return errors.Wrap(err, "failed to copy file info")
if err := copyFileInfo(fi, source, target); err != nil {
return fmt.Errorf("failed to copy file info: %w", err)
}
if err := copyXAttrs(target, source, o.xex, o.xeh); err != nil {
return errors.Wrap(err, "failed to copy xattrs")
return fmt.Errorf("failed to copy xattrs: %w", err)
}
}
@ -178,12 +178,12 @@ func copyDirectory(dst, src string, inodes map[uint64]string, o *copyDirOpts) er
func CopyFile(target, source string) error {
src, err := os.Open(source)
if err != nil {
return errors.Wrapf(err, "failed to open source %s", source)
return fmt.Errorf("failed to open source %s: %w", source, err)
}
defer src.Close()
tgt, err := os.Create(target)
if err != nil {
return errors.Wrapf(err, "failed to open target %s", target)
return fmt.Errorf("failed to open target %s: %w", target, err)
}
defer tgt.Close()

View file

@ -1,4 +1,5 @@
// +build darwin openbsd solaris
//go:build darwin
// +build darwin
/*
Copyright The containerd Authors.
@ -19,10 +20,10 @@
package fs
import (
"errors"
"os"
"syscall"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
@ -33,8 +34,3 @@ func copyDevice(dst string, fi os.FileInfo) error {
}
return unix.Mknod(dst, uint32(fi.Mode()), int(st.Rdev))
}
func utimesNano(name string, atime, mtime syscall.Timespec) error {
timespec := []syscall.Timespec{atime, mtime}
return syscall.UtimesNano(name, timespec)
}

View file

@ -0,0 +1,36 @@
//go:build openbsd || solaris || netbsd
// +build openbsd solaris netbsd
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fs
import (
"errors"
"os"
"syscall"
"golang.org/x/sys/unix"
)
func copyDevice(dst string, fi os.FileInfo) error {
st, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return errors.New("unsupported stat type")
}
return unix.Mknod(dst, uint32(fi.Mode()), int(st.Rdev))
}

View file

@ -1,3 +1,4 @@
//go:build freebsd
// +build freebsd
/*
@ -19,10 +20,10 @@
package fs
import (
"errors"
"os"
"syscall"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
@ -33,10 +34,3 @@ func copyDevice(dst string, fi os.FileInfo) error {
}
return unix.Mknod(dst, uint32(fi.Mode()), st.Rdev)
}
func utimesNano(name string, atime, mtime syscall.Timespec) error {
at := unix.NsecToTimespec(atime.Nano())
mt := unix.NsecToTimespec(mtime.Nano())
utimes := [2]unix.Timespec{at, mt}
return unix.UtimesNanoAt(unix.AT_FDCWD, name, utimes[0:], unix.AT_SYMLINK_NOFOLLOW)
}

View file

@ -17,16 +17,17 @@
package fs
import (
"errors"
"fmt"
"io"
"os"
"syscall"
"github.com/containerd/continuity/sysx"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
func copyFileInfo(fi os.FileInfo, name string) error {
func copyFileInfo(fi os.FileInfo, src, name string) error {
st := fi.Sys().(*syscall.Stat_t)
if err := os.Lchown(name, int(st.Uid), int(st.Gid)); err != nil {
if os.IsPermission(err) {
@ -41,13 +42,13 @@ func copyFileInfo(fi os.FileInfo, name string) error {
}
}
if err != nil {
return errors.Wrapf(err, "failed to chown %s", name)
return fmt.Errorf("failed to chown %s: %w", name, err)
}
}
if (fi.Mode() & os.ModeSymlink) != os.ModeSymlink {
if err := os.Chmod(name, fi.Mode()); err != nil {
return errors.Wrapf(err, "failed to chmod %s", name)
return fmt.Errorf("failed to chmod %s: %w", name, err)
}
}
@ -56,7 +57,7 @@ func copyFileInfo(fi os.FileInfo, name string) error {
unix.NsecToTimespec(syscall.TimespecToNsec(StatMtime(st))),
}
if err := unix.UtimesNanoAt(unix.AT_FDCWD, name, timespec, unix.AT_SYMLINK_NOFOLLOW); err != nil {
return errors.Wrapf(err, "failed to utime %s", name)
return fmt.Errorf("failed to utime %s: %w", name, err)
}
return nil
@ -67,7 +68,7 @@ const maxSSizeT = int64(^uint(0) >> 1)
func copyFileContent(dst, src *os.File) error {
st, err := src.Stat()
if err != nil {
return errors.Wrap(err, "unable to stat source")
return fmt.Errorf("unable to stat source: %w", err)
}
size := st.Size()
@ -88,13 +89,16 @@ func copyFileContent(dst, src *os.File) error {
n, err := unix.CopyFileRange(srcFd, nil, dstFd, nil, copySize, 0)
if err != nil {
if (err != unix.ENOSYS && err != unix.EXDEV) || !first {
return errors.Wrap(err, "copy file range failed")
return fmt.Errorf("copy file range failed: %w", err)
}
buf := bufferPool.Get().(*[]byte)
_, err = io.CopyBuffer(dst, src, *buf)
bufferPool.Put(buf)
return errors.Wrap(err, "userspace copy failed")
if err != nil {
return fmt.Errorf("userspace copy failed: %w", err)
}
return nil
}
first = false
@ -107,7 +111,7 @@ func copyFileContent(dst, src *os.File) error {
func copyXAttrs(dst, src string, excludes map[string]struct{}, errorHandler XAttrErrorHandler) error {
xattrKeys, err := sysx.LListxattr(src)
if err != nil {
e := errors.Wrapf(err, "failed to list xattrs on %s", src)
e := fmt.Errorf("failed to list xattrs on %s: %w", src, err)
if errorHandler != nil {
e = errorHandler(dst, src, "", e)
}
@ -119,7 +123,7 @@ func copyXAttrs(dst, src string, excludes map[string]struct{}, errorHandler XAtt
}
data, err := sysx.LGetxattr(src, xattr)
if err != nil {
e := errors.Wrapf(err, "failed to get xattr %q on %s", xattr, src)
e := fmt.Errorf("failed to get xattr %q on %s: %w", xattr, src, err)
if errorHandler != nil {
if e = errorHandler(dst, src, xattr, e); e == nil {
continue
@ -128,7 +132,7 @@ func copyXAttrs(dst, src string, excludes map[string]struct{}, errorHandler XAtt
return e
}
if err := sysx.LSetxattr(dst, xattr, data, 0); err != nil {
e := errors.Wrapf(err, "failed to set xattr %q on %s", xattr, dst)
e := fmt.Errorf("failed to set xattr %q on %s: %w", xattr, dst, err)
if errorHandler != nil {
if e = errorHandler(dst, src, xattr, e); e == nil {
continue

View file

@ -1,4 +1,5 @@
// +build darwin freebsd openbsd solaris
//go:build darwin || freebsd || openbsd || netbsd || solaris
// +build darwin freebsd openbsd netbsd solaris
/*
Copyright The containerd Authors.
@ -19,15 +20,15 @@
package fs
import (
"fmt"
"io"
"os"
"syscall"
"github.com/containerd/continuity/sysx"
"github.com/pkg/errors"
)
func copyFileInfo(fi os.FileInfo, name string) error {
func copyFileInfo(fi os.FileInfo, src, name string) error {
st := fi.Sys().(*syscall.Stat_t)
if err := os.Lchown(name, int(st.Uid), int(st.Gid)); err != nil {
if os.IsPermission(err) {
@ -42,18 +43,18 @@ func copyFileInfo(fi os.FileInfo, name string) error {
}
}
if err != nil {
return errors.Wrapf(err, "failed to chown %s", name)
return fmt.Errorf("failed to chown %s: %w", name, err)
}
}
if (fi.Mode() & os.ModeSymlink) != os.ModeSymlink {
if err := os.Chmod(name, fi.Mode()); err != nil {
return errors.Wrapf(err, "failed to chmod %s", name)
return fmt.Errorf("failed to chmod %s: %w", name, err)
}
}
if err := utimesNano(name, StatAtime(st), StatMtime(st)); err != nil {
return errors.Wrapf(err, "failed to utime %s", name)
return fmt.Errorf("failed to utime %s: %w", name, err)
}
return nil
@ -70,7 +71,7 @@ func copyFileContent(dst, src *os.File) error {
func copyXAttrs(dst, src string, excludes map[string]struct{}, errorHandler XAttrErrorHandler) error {
xattrKeys, err := sysx.LListxattr(src)
if err != nil {
e := errors.Wrapf(err, "failed to list xattrs on %s", src)
e := fmt.Errorf("failed to list xattrs on %s: %w", src, err)
if errorHandler != nil {
e = errorHandler(dst, src, "", e)
}
@ -82,7 +83,7 @@ func copyXAttrs(dst, src string, excludes map[string]struct{}, errorHandler XAtt
}
data, err := sysx.LGetxattr(src, xattr)
if err != nil {
e := errors.Wrapf(err, "failed to get xattr %q on %s", xattr, src)
e := fmt.Errorf("failed to get xattr %q on %s: %w", xattr, src, err)
if errorHandler != nil {
if e = errorHandler(dst, src, xattr, e); e == nil {
continue
@ -91,7 +92,7 @@ func copyXAttrs(dst, src string, excludes map[string]struct{}, errorHandler XAtt
return e
}
if err := sysx.LSetxattr(dst, xattr, data, 0); err != nil {
e := errors.Wrapf(err, "failed to set xattr %q on %s", xattr, dst)
e := fmt.Errorf("failed to set xattr %q on %s: %w", xattr, dst, err)
if errorHandler != nil {
if e = errorHandler(dst, src, xattr, e); e == nil {
continue

View file

@ -17,19 +17,60 @@
package fs
import (
"errors"
"fmt"
"io"
"os"
"github.com/pkg/errors"
winio "github.com/Microsoft/go-winio"
"golang.org/x/sys/windows"
)
func copyFileInfo(fi os.FileInfo, name string) error {
const (
seTakeOwnershipPrivilege = "SeTakeOwnershipPrivilege"
)
func copyFileInfo(fi os.FileInfo, src, name string) error {
if err := os.Chmod(name, fi.Mode()); err != nil {
return errors.Wrapf(err, "failed to chmod %s", name)
return fmt.Errorf("failed to chmod %s: %w", name, err)
}
// TODO: copy windows specific metadata
// Copy file ownership and ACL
// We need SeRestorePrivilege and SeTakeOwnershipPrivilege in order
// to restore security info on a file, especially if we're trying to
// apply security info which includes SIDs not necessarily present on
// the host.
privileges := []string{winio.SeRestorePrivilege, seTakeOwnershipPrivilege}
if err := winio.EnableProcessPrivileges(privileges); err != nil {
return err
}
defer winio.DisableProcessPrivileges(privileges)
secInfo, err := windows.GetNamedSecurityInfo(
src, windows.SE_FILE_OBJECT,
windows.OWNER_SECURITY_INFORMATION|windows.DACL_SECURITY_INFORMATION)
if err != nil {
return err
}
dacl, _, err := secInfo.DACL()
if err != nil {
return err
}
sid, _, err := secInfo.Owner()
if err != nil {
return err
}
if err := windows.SetNamedSecurityInfo(
name, windows.SE_FILE_OBJECT,
windows.OWNER_SECURITY_INFORMATION|windows.DACL_SECURITY_INFORMATION,
sid, nil, dacl, nil); err != nil {
return err
}
return nil
}

View file

@ -1,3 +1,4 @@
//go:build !windows
// +build !windows
/*
@ -20,11 +21,11 @@ package fs
import (
"bytes"
"fmt"
"os"
"syscall"
"github.com/containerd/continuity/sysx"
"github.com/pkg/errors"
)
// detectDirDiff returns diff dir options if a directory could
@ -56,11 +57,11 @@ func compareSysStat(s1, s2 interface{}) (bool, error) {
func compareCapabilities(p1, p2 string) (bool, error) {
c1, err := sysx.LGetxattr(p1, "security.capability")
if err != nil && err != sysx.ENODATA {
return false, errors.Wrapf(err, "failed to get xattr for %s", p1)
return false, fmt.Errorf("failed to get xattr for %s: %w", p1, err)
}
c2, err := sysx.LGetxattr(p2, "security.capability")
if err != nil && err != sysx.ENODATA {
return false, errors.Wrapf(err, "failed to get xattr for %s", p2)
return false, fmt.Errorf("failed to get xattr for %s: %w", p2, err)
}
return bytes.Equal(c1, c2), nil
}

View file

@ -1,3 +1,4 @@
//go:build linux
// +build linux
/*

View file

@ -1,3 +1,4 @@
//go:build !windows
// +build !windows
/*
@ -41,10 +42,8 @@ type inode struct {
func newInode(stat *syscall.Stat_t) inode {
return inode{
// Dev is uint32 on darwin/bsd, uint64 on linux/solaris/freebsd
dev: uint64(stat.Dev), // nolint: unconvert
// Ino is uint32 on bsd, uint64 on darwin/linux/solaris/freebsd
ino: uint64(stat.Ino), // nolint: unconvert
dev: uint64(stat.Dev), //nolint: unconvert // dev is uint32 on darwin/bsd, uint64 on linux/solaris/freebsd
ino: uint64(stat.Ino), //nolint: unconvert // ino is uint32 on bsd, uint64 on darwin/linux/solaris/freebsd
}
}

View file

@ -1,3 +1,4 @@
//go:build windows
// +build windows
/*

View file

@ -1,3 +1,4 @@
//go:build !windows
// +build !windows
/*
@ -29,6 +30,5 @@ func getLinkInfo(fi os.FileInfo) (uint64, bool) {
return 0, false
}
// Ino is uint32 on bsd, uint64 on darwin/linux/solaris
return uint64(s.Ino), !fi.IsDir() && s.Nlink > 1 // nolint: unconvert
return uint64(s.Ino), !fi.IsDir() && s.Nlink > 1 //nolint: unconvert // ino is uint32 on bsd, uint64 on darwin/linux/solaris
}

View file

@ -19,11 +19,10 @@ package fs
import (
"bytes"
"context"
"errors"
"io"
"os"
"path/filepath"
"github.com/pkg/errors"
)
var (

View file

@ -1,4 +1,5 @@
// +build linux openbsd
//go:build linux || openbsd || solaris
// +build linux openbsd solaris
/*
Copyright The containerd Authors.
@ -40,6 +41,5 @@ func StatMtime(st *syscall.Stat_t) syscall.Timespec {
// StatATimeAsTime returns st.Atim as a time.Time
func StatATimeAsTime(st *syscall.Stat_t) time.Time {
// The int64 conversions ensure the line compiles for 32-bit systems as well.
return time.Unix(int64(st.Atim.Sec), int64(st.Atim.Nsec)) // nolint: unconvert
return time.Unix(st.Atim.Unix())
}

View file

@ -1,4 +1,5 @@
// +build darwin freebsd
//go:build darwin || freebsd || netbsd
// +build darwin freebsd netbsd
/*
Copyright The containerd Authors.
@ -40,5 +41,5 @@ func StatMtime(st *syscall.Stat_t) syscall.Timespec {
// StatATimeAsTime returns the access time as a time.Time
func StatATimeAsTime(st *syscall.Stat_t) time.Time {
return time.Unix(int64(st.Atimespec.Sec), int64(st.Atimespec.Nsec)) // nolint: unconvert
return time.Unix(st.Atimespec.Unix())
}

View file

@ -0,0 +1,33 @@
//go:build !(windows || linux)
// +build !windows,!linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fs
import (
"syscall"
"golang.org/x/sys/unix"
)
func utimesNano(name string, atime, mtime syscall.Timespec) error {
at := unix.NsecToTimespec(atime.Nano())
mt := unix.NsecToTimespec(mtime.Nano())
utimes := [2]unix.Timespec{at, mt}
return unix.UtimesNanoAt(unix.AT_FDCWD, name, utimes[0:], unix.AT_SYMLINK_NOFOLLOW)
}

View file

@ -1,4 +1,5 @@
// +build darwin freebsd openbsd
//go:build !(linux || solaris || windows)
// +build !linux,!solaris,!windows
/*
Copyright The containerd Authors.

View file

@ -1,3 +1,4 @@
//go:build linux || darwin
// +build linux darwin
/*

View file

@ -1,3 +1,4 @@
//go:build !linux && !darwin
// +build !linux,!darwin
/*

View file

@ -1,16 +0,0 @@
cmd/snappytool/snappytool
testdata/bench
# These explicitly listed benchmark data files are for an obsolete version of
# snappy_test.go.
testdata/alice29.txt
testdata/asyoulik.txt
testdata/fireworks.jpeg
testdata/geo.protodata
testdata/html
testdata/html_x_4
testdata/kppkn.gtb
testdata/lcet10.txt
testdata/paper-100k.pdf
testdata/plrabn12.txt
testdata/urls.10K

View file

@ -1,17 +0,0 @@
# This is the official list of Snappy-Go authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Amazon.com, Inc
Damian Gryski <dgryski@gmail.com>
Google Inc.
Jan Mercl <0xjnml@gmail.com>
Klaus Post <klauspost@gmail.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Sebastien Binet <seb.binet@gmail.com>

View file

@ -1,39 +0,0 @@
# This is the official list of people who can contribute
# (and typically have contributed) code to the Snappy-Go repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Jan Mercl <0xjnml@gmail.com>
Jonathan Swinney <jswinney@amazon.com>
Kai Backman <kaib@golang.org>
Klaus Post <klauspost@gmail.com>
Marc-Antoine Ruel <maruel@chromium.org>
Nigel Tao <nigeltao@golang.org>
Rob Pike <r@golang.org>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Russ Cox <rsc@golang.org>
Sebastien Binet <seb.binet@gmail.com>

View file

@ -1,107 +0,0 @@
The Snappy compression format in the Go programming language.
To download and install from source:
$ go get github.com/golang/snappy
Unless otherwise noted, the Snappy-Go source files are distributed
under the BSD-style license found in the LICENSE file.
Benchmarks.
The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
or so files, the same set used by the C++ Snappy code (github.com/google/snappy
and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
"go test -test.bench=."
_UFlat0-8 2.19GB/s ± 0% html
_UFlat1-8 1.41GB/s ± 0% urls
_UFlat2-8 23.5GB/s ± 2% jpg
_UFlat3-8 1.91GB/s ± 0% jpg_200
_UFlat4-8 14.0GB/s ± 1% pdf
_UFlat5-8 1.97GB/s ± 0% html4
_UFlat6-8 814MB/s ± 0% txt1
_UFlat7-8 785MB/s ± 0% txt2
_UFlat8-8 857MB/s ± 0% txt3
_UFlat9-8 719MB/s ± 1% txt4
_UFlat10-8 2.84GB/s ± 0% pb
_UFlat11-8 1.05GB/s ± 0% gaviota
_ZFlat0-8 1.04GB/s ± 0% html
_ZFlat1-8 534MB/s ± 0% urls
_ZFlat2-8 15.7GB/s ± 1% jpg
_ZFlat3-8 740MB/s ± 3% jpg_200
_ZFlat4-8 9.20GB/s ± 1% pdf
_ZFlat5-8 991MB/s ± 0% html4
_ZFlat6-8 379MB/s ± 0% txt1
_ZFlat7-8 352MB/s ± 0% txt2
_ZFlat8-8 396MB/s ± 1% txt3
_ZFlat9-8 327MB/s ± 1% txt4
_ZFlat10-8 1.33GB/s ± 1% pb
_ZFlat11-8 605MB/s ± 1% gaviota
"go test -test.bench=. -tags=noasm"
_UFlat0-8 621MB/s ± 2% html
_UFlat1-8 494MB/s ± 1% urls
_UFlat2-8 23.2GB/s ± 1% jpg
_UFlat3-8 1.12GB/s ± 1% jpg_200
_UFlat4-8 4.35GB/s ± 1% pdf
_UFlat5-8 609MB/s ± 0% html4
_UFlat6-8 296MB/s ± 0% txt1
_UFlat7-8 288MB/s ± 0% txt2
_UFlat8-8 309MB/s ± 1% txt3
_UFlat9-8 280MB/s ± 1% txt4
_UFlat10-8 753MB/s ± 0% pb
_UFlat11-8 400MB/s ± 0% gaviota
_ZFlat0-8 409MB/s ± 1% html
_ZFlat1-8 250MB/s ± 1% urls
_ZFlat2-8 12.3GB/s ± 1% jpg
_ZFlat3-8 132MB/s ± 0% jpg_200
_ZFlat4-8 2.92GB/s ± 0% pdf
_ZFlat5-8 405MB/s ± 1% html4
_ZFlat6-8 179MB/s ± 1% txt1
_ZFlat7-8 170MB/s ± 1% txt2
_ZFlat8-8 189MB/s ± 1% txt3
_ZFlat9-8 164MB/s ± 1% txt4
_ZFlat10-8 479MB/s ± 1% pb
_ZFlat11-8 270MB/s ± 1% gaviota
For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
are the numbers from C++ Snappy's
make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
BM_UFlat/0 2.4GB/s html
BM_UFlat/1 1.4GB/s urls
BM_UFlat/2 21.8GB/s jpg
BM_UFlat/3 1.5GB/s jpg_200
BM_UFlat/4 13.3GB/s pdf
BM_UFlat/5 2.1GB/s html4
BM_UFlat/6 1.0GB/s txt1
BM_UFlat/7 959.4MB/s txt2
BM_UFlat/8 1.0GB/s txt3
BM_UFlat/9 864.5MB/s txt4
BM_UFlat/10 2.9GB/s pb
BM_UFlat/11 1.2GB/s gaviota
BM_ZFlat/0 944.3MB/s html (22.31 %)
BM_ZFlat/1 501.6MB/s urls (47.78 %)
BM_ZFlat/2 14.3GB/s jpg (99.95 %)
BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %)
BM_ZFlat/4 8.3GB/s pdf (83.30 %)
BM_ZFlat/5 903.5MB/s html4 (22.52 %)
BM_ZFlat/6 336.0MB/s txt1 (57.88 %)
BM_ZFlat/7 312.3MB/s txt2 (61.91 %)
BM_ZFlat/8 353.1MB/s txt3 (54.99 %)
BM_ZFlat/9 289.9MB/s txt4 (66.26 %)
BM_ZFlat/10 1.2GB/s pb (19.68 %)
BM_ZFlat/11 527.4MB/s gaviota (37.72 %)

View file

@ -1,490 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
// - AX scratch
// - BX scratch
// - CX length or x
// - DX offset
// - SI &src[s]
// - DI &dst[d]
// + R8 dst_base
// + R9 dst_len
// + R10 dst_base + dst_len
// + R11 src_base
// + R12 src_len
// + R13 src_base + src_len
// - R14 used by doCopy
// - R15 used by doCopy
//
// The registers R8-R13 (marked with a "+") are set at the start of the
// function, and after a CALL returns, and are not otherwise modified.
//
// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI.
// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
TEXT ·decode(SB), NOSPLIT, $48-56
// Initialize SI, DI and R8-R13.
MOVQ dst_base+0(FP), R8
MOVQ dst_len+8(FP), R9
MOVQ R8, DI
MOVQ R8, R10
ADDQ R9, R10
MOVQ src_base+24(FP), R11
MOVQ src_len+32(FP), R12
MOVQ R11, SI
MOVQ R11, R13
ADDQ R12, R13
loop:
// for s < len(src)
CMPQ SI, R13
JEQ end
// CX = uint32(src[s])
//
// switch src[s] & 0x03
MOVBLZX (SI), CX
MOVL CX, BX
ANDL $3, BX
CMPL BX, $1
JAE tagCopy
// ----------------------------------------
// The code below handles literal tags.
// case tagLiteral:
// x := uint32(src[s] >> 2)
// switch
SHRL $2, CX
CMPL CX, $60
JAE tagLit60Plus
// case x < 60:
// s++
INCQ SI
doLit:
// This is the end of the inner "switch", when we have a literal tag.
//
// We assume that CX == x and x fits in a uint32, where x is the variable
// used in the pure Go decode_other.go code.
// length = int(x) + 1
//
// Unlike the pure Go code, we don't need to check if length <= 0 because
// CX can hold 64 bits, so the increment cannot overflow.
INCQ CX
// Prepare to check if copying length bytes will run past the end of dst or
// src.
//
// AX = len(dst) - d
// BX = len(src) - s
MOVQ R10, AX
SUBQ DI, AX
MOVQ R13, BX
SUBQ SI, BX
// !!! Try a faster technique for short (16 or fewer bytes) copies.
//
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
// goto callMemmove // Fall back on calling runtime·memmove.
// }
//
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
// against 21 instead of 16, because it cannot assume that all of its input
// is contiguous in memory and so it needs to leave enough source bytes to
// read the next tag without refilling buffers, but Go's Decode assumes
// contiguousness (the src argument is a []byte).
CMPQ CX, $16
JGT callMemmove
CMPQ AX, $16
JLT callMemmove
CMPQ BX, $16
JLT callMemmove
// !!! Implement the copy from src to dst as a 16-byte load and store.
// (Decode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only length bytes, but that's
// OK. If the input is a valid Snappy encoding then subsequent iterations
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
// non-nil error), so the overrun will be ignored.
//
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
MOVOU 0(SI), X0
MOVOU X0, 0(DI)
// d += length
// s += length
ADDQ CX, DI
ADDQ CX, SI
JMP loop
callMemmove:
// if length > len(dst)-d || length > len(src)-s { etc }
CMPQ CX, AX
JGT errCorrupt
CMPQ CX, BX
JGT errCorrupt
// copy(dst[d:], src[s:s+length])
//
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
// DI, SI and CX as arguments. Coincidentally, we also need to spill those
// three registers to the stack, to save local variables across the CALL.
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ CX, 16(SP)
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP)
CALL runtime·memmove(SB)
// Restore local variables: unspill registers from the stack and
// re-calculate R8-R13.
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
MOVQ dst_base+0(FP), R8
MOVQ dst_len+8(FP), R9
MOVQ R8, R10
ADDQ R9, R10
MOVQ src_base+24(FP), R11
MOVQ src_len+32(FP), R12
MOVQ R11, R13
ADDQ R12, R13
// d += length
// s += length
ADDQ CX, DI
ADDQ CX, SI
JMP loop
tagLit60Plus:
// !!! This fragment does the
//
// s += x - 58; if uint(s) > uint(len(src)) { etc }
//
// checks. In the asm version, we code it once instead of once per switch case.
ADDQ CX, SI
SUBQ $58, SI
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// case x == 60:
CMPL CX, $61
JEQ tagLit61
JA tagLit62Plus
// x = uint32(src[s-1])
MOVBLZX -1(SI), CX
JMP doLit
tagLit61:
// case x == 61:
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
MOVWLZX -2(SI), CX
JMP doLit
tagLit62Plus:
CMPL CX, $62
JA tagLit63
// case x == 62:
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
MOVWLZX -3(SI), CX
MOVBLZX -1(SI), BX
SHLL $16, BX
ORL BX, CX
JMP doLit
tagLit63:
// case x == 63:
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
MOVL -4(SI), CX
JMP doLit
// The code above handles literal tags.
// ----------------------------------------
// The code below handles copy tags.
tagCopy4:
// case tagCopy4:
// s += 5
ADDQ $5, SI
// if uint(s) > uint(len(src)) { etc }
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// length = 1 + int(src[s-5])>>2
SHRQ $2, CX
INCQ CX
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
MOVLQZX -4(SI), DX
JMP doCopy
tagCopy2:
// case tagCopy2:
// s += 3
ADDQ $3, SI
// if uint(s) > uint(len(src)) { etc }
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// length = 1 + int(src[s-3])>>2
SHRQ $2, CX
INCQ CX
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVWQZX -2(SI), DX
JMP doCopy
tagCopy:
// We have a copy tag. We assume that:
// - BX == src[s] & 0x03
// - CX == src[s]
CMPQ BX, $2
JEQ tagCopy2
JA tagCopy4
// case tagCopy1:
// s += 2
ADDQ $2, SI
// if uint(s) > uint(len(src)) { etc }
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
MOVQ CX, DX
ANDQ $0xe0, DX
SHLQ $3, DX
MOVBQZX -1(SI), BX
ORQ BX, DX
// length = 4 + int(src[s-2])>>2&0x7
SHRQ $2, CX
ANDQ $7, CX
ADDQ $4, CX
doCopy:
// This is the end of the outer "switch", when we have a copy tag.
//
// We assume that:
// - CX == length && CX > 0
// - DX == offset
// if offset <= 0 { etc }
CMPQ DX, $0
JLE errCorrupt
// if d < offset { etc }
MOVQ DI, BX
SUBQ R8, BX
CMPQ BX, DX
JLT errCorrupt
// if length > len(dst)-d { etc }
MOVQ R10, BX
SUBQ DI, BX
CMPQ CX, BX
JGT errCorrupt
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
//
// Set:
// - R14 = len(dst)-d
// - R15 = &dst[d-offset]
MOVQ R10, R14
SUBQ DI, R14
MOVQ DI, R15
SUBQ DX, R15
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
//
// First, try using two 8-byte load/stores, similar to the doLit technique
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
// and not one 16-byte load/store, and the first store has to be before the
// second load, due to the overlap if offset is in the range [8, 16).
//
// if length > 16 || offset < 8 || len(dst)-d < 16 {
// goto slowForwardCopy
// }
// copy 16 bytes
// d += length
CMPQ CX, $16
JGT slowForwardCopy
CMPQ DX, $8
JLT slowForwardCopy
CMPQ R14, $16
JLT slowForwardCopy
MOVQ 0(R15), AX
MOVQ AX, 0(DI)
MOVQ 8(R15), BX
MOVQ BX, 8(DI)
ADDQ CX, DI
JMP loop
slowForwardCopy:
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
// can still try 8-byte load stores, provided we can overrun up to 10 extra
// bytes. As above, the overrun will be fixed up by subsequent iterations
// of the outermost loop.
//
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
// commentary says:
//
// ----
//
// The main part of this loop is a simple copy of eight bytes at a time
// until we've copied (at least) the requested amount of bytes. However,
// if d and d-offset are less than eight bytes apart (indicating a
// repeating pattern of length < 8), we first need to expand the pattern in
// order to get the correct results. For instance, if the buffer looks like
// this, with the eight-byte <d-offset> and <d> patterns marked as
// intervals:
//
// abxxxxxxxxxxxx
// [------] d-offset
// [------] d
//
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
// once, after which we can move <d> two bytes without moving <d-offset>:
//
// ababxxxxxxxxxx
// [------] d-offset
// [------] d
//
// and repeat the exercise until the two no longer overlap.
//
// This allows us to do very well in the special case of one single byte
// repeated many times, without taking a big hit for more general cases.
//
// The worst case of extra writing past the end of the match occurs when
// offset == 1 and length == 1; the last copy will read from byte positions
// [0..7] and write to [4..11], whereas it was only supposed to write to
// position 1. Thus, ten excess bytes.
//
// ----
//
// That "10 byte overrun" worst case is confirmed by Go's
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
// and finishSlowForwardCopy algorithm.
//
// if length > len(dst)-d-10 {
// goto verySlowForwardCopy
// }
SUBQ $10, R14
CMPQ CX, R14
JGT verySlowForwardCopy
makeOffsetAtLeast8:
// !!! As above, expand the pattern so that offset >= 8 and we can use
// 8-byte load/stores.
//
// for offset < 8 {
// copy 8 bytes from dst[d-offset:] to dst[d:]
// length -= offset
// d += offset
// offset += offset
// // The two previous lines together means that d-offset, and therefore
// // R15, is unchanged.
// }
CMPQ DX, $8
JGE fixUpSlowForwardCopy
MOVQ (R15), BX
MOVQ BX, (DI)
SUBQ DX, CX
ADDQ DX, DI
ADDQ DX, DX
JMP makeOffsetAtLeast8
fixUpSlowForwardCopy:
// !!! Add length (which might be negative now) to d (implied by DI being
// &dst[d]) so that d ends up at the right place when we jump back to the
// top of the loop. Before we do that, though, we save DI to AX so that, if
// length is positive, copying the remaining length bytes will write to the
// right place.
MOVQ DI, AX
ADDQ CX, DI
finishSlowForwardCopy:
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
// length means that we overrun, but as above, that will be fixed up by
// subsequent iterations of the outermost loop.
CMPQ CX, $0
JLE loop
MOVQ (R15), BX
MOVQ BX, (AX)
ADDQ $8, R15
ADDQ $8, AX
SUBQ $8, CX
JMP finishSlowForwardCopy
verySlowForwardCopy:
// verySlowForwardCopy is a simple implementation of forward copy. In C
// parlance, this is a do/while loop instead of a while loop, since we know
// that length > 0. In Go syntax:
//
// for {
// dst[d] = dst[d - offset]
// d++
// length--
// if length == 0 {
// break
// }
// }
MOVB (R15), BX
MOVB BX, (DI)
INCQ R15
INCQ DI
DECQ CX
JNZ verySlowForwardCopy
JMP loop
// The code above handles copy tags.
// ----------------------------------------
end:
// This is the end of the "for s < len(src)".
//
// if d != len(dst) { etc }
CMPQ DI, R10
JNE errCorrupt
// return 0
MOVQ $0, ret+48(FP)
RET
errCorrupt:
// return decodeErrCodeCorrupt
MOVQ $1, ret+48(FP)
RET

View file

@ -1,494 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
// - R2 scratch
// - R3 scratch
// - R4 length or x
// - R5 offset
// - R6 &src[s]
// - R7 &dst[d]
// + R8 dst_base
// + R9 dst_len
// + R10 dst_base + dst_len
// + R11 src_base
// + R12 src_len
// + R13 src_base + src_len
// - R14 used by doCopy
// - R15 used by doCopy
//
// The registers R8-R13 (marked with a "+") are set at the start of the
// function, and after a CALL returns, and are not otherwise modified.
//
// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7.
// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6.
TEXT ·decode(SB), NOSPLIT, $56-56
// Initialize R6, R7 and R8-R13.
MOVD dst_base+0(FP), R8
MOVD dst_len+8(FP), R9
MOVD R8, R7
MOVD R8, R10
ADD R9, R10, R10
MOVD src_base+24(FP), R11
MOVD src_len+32(FP), R12
MOVD R11, R6
MOVD R11, R13
ADD R12, R13, R13
loop:
// for s < len(src)
CMP R13, R6
BEQ end
// R4 = uint32(src[s])
//
// switch src[s] & 0x03
MOVBU (R6), R4
MOVW R4, R3
ANDW $3, R3
MOVW $1, R1
CMPW R1, R3
BGE tagCopy
// ----------------------------------------
// The code below handles literal tags.
// case tagLiteral:
// x := uint32(src[s] >> 2)
// switch
MOVW $60, R1
LSRW $2, R4, R4
CMPW R4, R1
BLS tagLit60Plus
// case x < 60:
// s++
ADD $1, R6, R6
doLit:
// This is the end of the inner "switch", when we have a literal tag.
//
// We assume that R4 == x and x fits in a uint32, where x is the variable
// used in the pure Go decode_other.go code.
// length = int(x) + 1
//
// Unlike the pure Go code, we don't need to check if length <= 0 because
// R4 can hold 64 bits, so the increment cannot overflow.
ADD $1, R4, R4
// Prepare to check if copying length bytes will run past the end of dst or
// src.
//
// R2 = len(dst) - d
// R3 = len(src) - s
MOVD R10, R2
SUB R7, R2, R2
MOVD R13, R3
SUB R6, R3, R3
// !!! Try a faster technique for short (16 or fewer bytes) copies.
//
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
// goto callMemmove // Fall back on calling runtime·memmove.
// }
//
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
// against 21 instead of 16, because it cannot assume that all of its input
// is contiguous in memory and so it needs to leave enough source bytes to
// read the next tag without refilling buffers, but Go's Decode assumes
// contiguousness (the src argument is a []byte).
CMP $16, R4
BGT callMemmove
CMP $16, R2
BLT callMemmove
CMP $16, R3
BLT callMemmove
// !!! Implement the copy from src to dst as a 16-byte load and store.
// (Decode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only length bytes, but that's
// OK. If the input is a valid Snappy encoding then subsequent iterations
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
// non-nil error), so the overrun will be ignored.
//
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
LDP 0(R6), (R14, R15)
STP (R14, R15), 0(R7)
// d += length
// s += length
ADD R4, R7, R7
ADD R4, R6, R6
B loop
callMemmove:
// if length > len(dst)-d || length > len(src)-s { etc }
CMP R2, R4
BGT errCorrupt
CMP R3, R4
BGT errCorrupt
// copy(dst[d:], src[s:s+length])
//
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
// R7, R6 and R4 as arguments. Coincidentally, we also need to spill those
// three registers to the stack, to save local variables across the CALL.
MOVD R7, 8(RSP)
MOVD R6, 16(RSP)
MOVD R4, 24(RSP)
MOVD R7, 32(RSP)
MOVD R6, 40(RSP)
MOVD R4, 48(RSP)
CALL runtime·memmove(SB)
// Restore local variables: unspill registers from the stack and
// re-calculate R8-R13.
MOVD 32(RSP), R7
MOVD 40(RSP), R6
MOVD 48(RSP), R4
MOVD dst_base+0(FP), R8
MOVD dst_len+8(FP), R9
MOVD R8, R10
ADD R9, R10, R10
MOVD src_base+24(FP), R11
MOVD src_len+32(FP), R12
MOVD R11, R13
ADD R12, R13, R13
// d += length
// s += length
ADD R4, R7, R7
ADD R4, R6, R6
B loop
tagLit60Plus:
// !!! This fragment does the
//
// s += x - 58; if uint(s) > uint(len(src)) { etc }
//
// checks. In the asm version, we code it once instead of once per switch case.
ADD R4, R6, R6
SUB $58, R6, R6
MOVD R6, R3
SUB R11, R3, R3
CMP R12, R3
BGT errCorrupt
// case x == 60:
MOVW $61, R1
CMPW R1, R4
BEQ tagLit61
BGT tagLit62Plus
// x = uint32(src[s-1])
MOVBU -1(R6), R4
B doLit
tagLit61:
// case x == 61:
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
MOVHU -2(R6), R4
B doLit
tagLit62Plus:
CMPW $62, R4
BHI tagLit63
// case x == 62:
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
MOVHU -3(R6), R4
MOVBU -1(R6), R3
ORR R3<<16, R4
B doLit
tagLit63:
// case x == 63:
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
MOVWU -4(R6), R4
B doLit
// The code above handles literal tags.
// ----------------------------------------
// The code below handles copy tags.
tagCopy4:
// case tagCopy4:
// s += 5
ADD $5, R6, R6
// if uint(s) > uint(len(src)) { etc }
MOVD R6, R3
SUB R11, R3, R3
CMP R12, R3
BGT errCorrupt
// length = 1 + int(src[s-5])>>2
MOVD $1, R1
ADD R4>>2, R1, R4
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
MOVWU -4(R6), R5
B doCopy
tagCopy2:
// case tagCopy2:
// s += 3
ADD $3, R6, R6
// if uint(s) > uint(len(src)) { etc }
MOVD R6, R3
SUB R11, R3, R3
CMP R12, R3
BGT errCorrupt
// length = 1 + int(src[s-3])>>2
MOVD $1, R1
ADD R4>>2, R1, R4
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVHU -2(R6), R5
B doCopy
tagCopy:
// We have a copy tag. We assume that:
// - R3 == src[s] & 0x03
// - R4 == src[s]
CMP $2, R3
BEQ tagCopy2
BGT tagCopy4
// case tagCopy1:
// s += 2
ADD $2, R6, R6
// if uint(s) > uint(len(src)) { etc }
MOVD R6, R3
SUB R11, R3, R3
CMP R12, R3
BGT errCorrupt
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
MOVD R4, R5
AND $0xe0, R5
MOVBU -1(R6), R3
ORR R5<<3, R3, R5
// length = 4 + int(src[s-2])>>2&0x7
MOVD $7, R1
AND R4>>2, R1, R4
ADD $4, R4, R4
doCopy:
// This is the end of the outer "switch", when we have a copy tag.
//
// We assume that:
// - R4 == length && R4 > 0
// - R5 == offset
// if offset <= 0 { etc }
MOVD $0, R1
CMP R1, R5
BLE errCorrupt
// if d < offset { etc }
MOVD R7, R3
SUB R8, R3, R3
CMP R5, R3
BLT errCorrupt
// if length > len(dst)-d { etc }
MOVD R10, R3
SUB R7, R3, R3
CMP R3, R4
BGT errCorrupt
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
//
// Set:
// - R14 = len(dst)-d
// - R15 = &dst[d-offset]
MOVD R10, R14
SUB R7, R14, R14
MOVD R7, R15
SUB R5, R15, R15
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
//
// First, try using two 8-byte load/stores, similar to the doLit technique
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
// and not one 16-byte load/store, and the first store has to be before the
// second load, due to the overlap if offset is in the range [8, 16).
//
// if length > 16 || offset < 8 || len(dst)-d < 16 {
// goto slowForwardCopy
// }
// copy 16 bytes
// d += length
CMP $16, R4
BGT slowForwardCopy
CMP $8, R5
BLT slowForwardCopy
CMP $16, R14
BLT slowForwardCopy
MOVD 0(R15), R2
MOVD R2, 0(R7)
MOVD 8(R15), R3
MOVD R3, 8(R7)
ADD R4, R7, R7
B loop
slowForwardCopy:
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
// can still try 8-byte load stores, provided we can overrun up to 10 extra
// bytes. As above, the overrun will be fixed up by subsequent iterations
// of the outermost loop.
//
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
// commentary says:
//
// ----
//
// The main part of this loop is a simple copy of eight bytes at a time
// until we've copied (at least) the requested amount of bytes. However,
// if d and d-offset are less than eight bytes apart (indicating a
// repeating pattern of length < 8), we first need to expand the pattern in
// order to get the correct results. For instance, if the buffer looks like
// this, with the eight-byte <d-offset> and <d> patterns marked as
// intervals:
//
// abxxxxxxxxxxxx
// [------] d-offset
// [------] d
//
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
// once, after which we can move <d> two bytes without moving <d-offset>:
//
// ababxxxxxxxxxx
// [------] d-offset
// [------] d
//
// and repeat the exercise until the two no longer overlap.
//
// This allows us to do very well in the special case of one single byte
// repeated many times, without taking a big hit for more general cases.
//
// The worst case of extra writing past the end of the match occurs when
// offset == 1 and length == 1; the last copy will read from byte positions
// [0..7] and write to [4..11], whereas it was only supposed to write to
// position 1. Thus, ten excess bytes.
//
// ----
//
// That "10 byte overrun" worst case is confirmed by Go's
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
// and finishSlowForwardCopy algorithm.
//
// if length > len(dst)-d-10 {
// goto verySlowForwardCopy
// }
SUB $10, R14, R14
CMP R14, R4
BGT verySlowForwardCopy
makeOffsetAtLeast8:
// !!! As above, expand the pattern so that offset >= 8 and we can use
// 8-byte load/stores.
//
// for offset < 8 {
// copy 8 bytes from dst[d-offset:] to dst[d:]
// length -= offset
// d += offset
// offset += offset
// // The two previous lines together means that d-offset, and therefore
// // R15, is unchanged.
// }
CMP $8, R5
BGE fixUpSlowForwardCopy
MOVD (R15), R3
MOVD R3, (R7)
SUB R5, R4, R4
ADD R5, R7, R7
ADD R5, R5, R5
B makeOffsetAtLeast8
fixUpSlowForwardCopy:
// !!! Add length (which might be negative now) to d (implied by R7 being
// &dst[d]) so that d ends up at the right place when we jump back to the
// top of the loop. Before we do that, though, we save R7 to R2 so that, if
// length is positive, copying the remaining length bytes will write to the
// right place.
MOVD R7, R2
ADD R4, R7, R7
finishSlowForwardCopy:
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
// length means that we overrun, but as above, that will be fixed up by
// subsequent iterations of the outermost loop.
MOVD $0, R1
CMP R1, R4
BLE loop
MOVD (R15), R3
MOVD R3, (R2)
ADD $8, R15, R15
ADD $8, R2, R2
SUB $8, R4, R4
B finishSlowForwardCopy
verySlowForwardCopy:
// verySlowForwardCopy is a simple implementation of forward copy. In C
// parlance, this is a do/while loop instead of a while loop, since we know
// that length > 0. In Go syntax:
//
// for {
// dst[d] = dst[d - offset]
// d++
// length--
// if length == 0 {
// break
// }
// }
MOVB (R15), R3
MOVB R3, (R7)
ADD $1, R15, R15
ADD $1, R7, R7
SUB $1, R4, R4
CBNZ R4, verySlowForwardCopy
B loop
// The code above handles copy tags.
// ----------------------------------------
end:
// This is the end of the "for s < len(src)".
//
// if d != len(dst) { etc }
CMP R10, R7
BNE errCorrupt
// return 0
MOVD $0, ret+48(FP)
RET
errCorrupt:
// return decodeErrCodeCorrupt
MOVD $1, R2
MOVD R2, ret+48(FP)
RET

View file

@ -1,15 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
// +build amd64 arm64
package snappy
// decode has the same semantics as in decode_other.go.
//
//go:noescape
func decode(dst, src []byte) int

View file

@ -1,730 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
// https://github.com/golang/snappy/issues/29
//
// As a workaround, the package was built with a known good assembler, and
// those instructions were disassembled by "objdump -d" to yield the
// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
// style comments, in AT&T asm syntax. Note that rsp here is a physical
// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
// fine on Go 1.6.
// The asm code generally follows the pure Go code in encode_other.go, except
// where marked with a "!!!".
// ----------------------------------------------------------------------------
// func emitLiteral(dst, lit []byte) int
//
// All local variables fit into registers. The register allocation:
// - AX len(lit)
// - BX n
// - DX return value
// - DI &dst[i]
// - R10 &lit[0]
//
// The 24 bytes of stack space is to call runtime·memmove.
//
// The unusual register allocation of local variables, such as R10 for the
// source pointer, matches the allocation used at the call site in encodeBlock,
// which makes it easier to manually inline this function.
TEXT ·emitLiteral(SB), NOSPLIT, $24-56
MOVQ dst_base+0(FP), DI
MOVQ lit_base+24(FP), R10
MOVQ lit_len+32(FP), AX
MOVQ AX, DX
MOVL AX, BX
SUBL $1, BX
CMPL BX, $60
JLT oneByte
CMPL BX, $256
JLT twoBytes
threeBytes:
MOVB $0xf4, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
ADDQ $3, DX
JMP memmove
twoBytes:
MOVB $0xf0, 0(DI)
MOVB BX, 1(DI)
ADDQ $2, DI
ADDQ $2, DX
JMP memmove
oneByte:
SHLB $2, BX
MOVB BX, 0(DI)
ADDQ $1, DI
ADDQ $1, DX
memmove:
MOVQ DX, ret+48(FP)
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// DI, R10 and AX as arguments.
MOVQ DI, 0(SP)
MOVQ R10, 8(SP)
MOVQ AX, 16(SP)
CALL runtime·memmove(SB)
RET
// ----------------------------------------------------------------------------
// func emitCopy(dst []byte, offset, length int) int
//
// All local variables fit into registers. The register allocation:
// - AX length
// - SI &dst[0]
// - DI &dst[i]
// - R11 offset
//
// The unusual register allocation of local variables, such as R11 for the
// offset, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·emitCopy(SB), NOSPLIT, $0-48
MOVQ dst_base+0(FP), DI
MOVQ DI, SI
MOVQ offset+24(FP), R11
MOVQ length+32(FP), AX
loop0:
// for length >= 68 { etc }
CMPL AX, $68
JLT step1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $64, AX
JMP loop0
step1:
// if length > 64 { etc }
CMPL AX, $64
JLE step2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $60, AX
step2:
// if length >= 12 || offset >= 2048 { goto step3 }
CMPL AX, $12
JGE step3
CMPL R11, $2048
JGE step3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(DI)
SHRL $8, R11
SHLB $5, R11
SUBB $4, AX
SHLB $2, AX
ORB AX, R11
ORB $1, R11
MOVB R11, 0(DI)
ADDQ $2, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
step3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, AX
SHLB $2, AX
ORB $2, AX
MOVB AX, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func extendMatch(src []byte, i, j int) int
//
// All local variables fit into registers. The register allocation:
// - DX &src[0]
// - SI &src[j]
// - R13 &src[len(src) - 8]
// - R14 &src[len(src)]
// - R15 &src[i]
//
// The unusual register allocation of local variables, such as R15 for a source
// pointer, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·extendMatch(SB), NOSPLIT, $0-48
MOVQ src_base+0(FP), DX
MOVQ src_len+8(FP), R14
MOVQ i+24(FP), R15
MOVQ j+32(FP), SI
ADDQ DX, R14
ADDQ DX, R15
ADDQ DX, SI
MOVQ R14, R13
SUBQ $8, R13
cmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ SI, R13
JA cmp1
MOVQ (R15), AX
MOVQ (SI), BX
CMPQ AX, BX
JNE bsf
ADDQ $8, R15
ADDQ $8, SI
JMP cmp8
bsf:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, SI
// Convert from &src[ret] to ret.
SUBQ DX, SI
MOVQ SI, ret+40(FP)
RET
cmp1:
// In src's tail, compare 1 byte at a time.
CMPQ SI, R14
JAE extendMatchEnd
MOVB (R15), AX
MOVB (SI), BX
CMPB AX, BX
JNE extendMatchEnd
ADDQ $1, R15
ADDQ $1, SI
JMP cmp1
extendMatchEnd:
// Convert from &src[ret] to ret.
SUBQ DX, SI
MOVQ SI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func encodeBlock(dst, src []byte) (d int)
//
// All local variables fit into registers, other than "var table". The register
// allocation:
// - AX . .
// - BX . .
// - CX 56 shift (note that amd64 shifts by non-immediates must use CX).
// - DX 64 &src[0], tableSize
// - SI 72 &src[s]
// - DI 80 &dst[d]
// - R9 88 sLimit
// - R10 . &src[nextEmit]
// - R11 96 prevHash, currHash, nextHash, offset
// - R12 104 &src[base], skip
// - R13 . &src[nextS], &src[len(src) - 8]
// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
// - R15 112 candidate
//
// The second column (56, 64, etc) is the stack offset to spill the registers
// when calling other functions. We could pack this slightly tighter, but it's
// simpler to have a dedicated spill map independent of the function called.
//
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
TEXT ·encodeBlock(SB), 0, $32888-56
MOVQ dst_base+0(FP), DI
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), R14
// shift, tableSize := uint32(32-8), 1<<8
MOVQ $24, CX
MOVQ $256, DX
calcShift:
// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
// shift--
// }
CMPQ DX, $16384
JGE varTable
CMPQ DX, R14
JGE varTable
SUBQ $1, CX
SHLQ $1, DX
JMP calcShift
varTable:
// var table [maxTableSize]uint16
//
// In the asm code, unlike the Go code, we can zero-initialize only the
// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
// writes 16 bytes, so we can do only tableSize/8 writes instead of the
// 2048 writes that would zero-initialize all of table's 32768 bytes.
SHRQ $3, DX
LEAQ table-32768(SP), BX
PXOR X0, X0
memclr:
MOVOU X0, 0(BX)
ADDQ $16, BX
SUBQ $1, DX
JNZ memclr
// !!! DX = &src[0]
MOVQ SI, DX
// sLimit := len(src) - inputMargin
MOVQ R14, R9
SUBQ $15, R9
// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
// change for the rest of the function.
MOVQ CX, 56(SP)
MOVQ DX, 64(SP)
MOVQ R9, 88(SP)
// nextEmit := 0
MOVQ DX, R10
// s := 1
ADDQ $1, SI
// nextHash := hash(load32(src, s), shift)
MOVL 0(SI), R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
outer:
// for { etc }
// skip := 32
MOVQ $32, R12
// nextS := s
MOVQ SI, R13
// candidate := 0
MOVQ $0, R15
inner0:
// for { etc }
// s := nextS
MOVQ R13, SI
// bytesBetweenHashLookups := skip >> 5
MOVQ R12, R14
SHRQ $5, R14
// nextS = s + bytesBetweenHashLookups
ADDQ R14, R13
// skip += bytesBetweenHashLookups
ADDQ R14, R12
// if nextS > sLimit { goto emitRemainder }
MOVQ R13, AX
SUBQ DX, AX
CMPQ AX, R9
JA emitRemainder
// candidate = int(table[nextHash])
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
BYTE $0x4e
BYTE $0x0f
BYTE $0xb7
BYTE $0x7c
BYTE $0x5c
BYTE $0x78
// table[nextHash] = uint16(s)
MOVQ SI, AX
SUBQ DX, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// nextHash = hash(load32(src, nextS), shift)
MOVL 0(R13), R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// if load32(src, s) != load32(src, candidate) { continue } break
MOVL 0(SI), AX
MOVL (DX)(R15*1), BX
CMPL AX, BX
JNE inner0
fourByteMatch:
// As per the encode_other.go code:
//
// A 4-byte match has been found. We'll later see etc.
// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
// on inputMargin in encode.go.
MOVQ SI, AX
SUBQ R10, AX
CMPQ AX, $16
JLE emitLiteralFastPath
// ----------------------------------------
// Begin inline of the emitLiteral call.
//
// d += emitLiteral(dst[d:], src[nextEmit:s])
MOVL AX, BX
SUBL $1, BX
CMPL BX, $60
JLT inlineEmitLiteralOneByte
CMPL BX, $256
JLT inlineEmitLiteralTwoBytes
inlineEmitLiteralThreeBytes:
MOVB $0xf4, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
JMP inlineEmitLiteralMemmove
inlineEmitLiteralTwoBytes:
MOVB $0xf0, 0(DI)
MOVB BX, 1(DI)
ADDQ $2, DI
JMP inlineEmitLiteralMemmove
inlineEmitLiteralOneByte:
SHLB $2, BX
MOVB BX, 0(DI)
ADDQ $1, DI
inlineEmitLiteralMemmove:
// Spill local variables (registers) onto the stack; call; unspill.
//
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// DI, R10 and AX as arguments.
MOVQ DI, 0(SP)
MOVQ R10, 8(SP)
MOVQ AX, 16(SP)
ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)".
MOVQ SI, 72(SP)
MOVQ DI, 80(SP)
MOVQ R15, 112(SP)
CALL runtime·memmove(SB)
MOVQ 56(SP), CX
MOVQ 64(SP), DX
MOVQ 72(SP), SI
MOVQ 80(SP), DI
MOVQ 88(SP), R9
MOVQ 112(SP), R15
JMP inner1
inlineEmitLiteralEnd:
// End inline of the emitLiteral call.
// ----------------------------------------
emitLiteralFastPath:
// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
MOVB AX, BX
SUBB $1, BX
SHLB $2, BX
MOVB BX, (DI)
ADDQ $1, DI
// !!! Implement the copy from lit to dst as a 16-byte load and store.
// (Encode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only len(lit) bytes, but that's
// OK. Subsequent iterations will fix up the overrun.
//
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
MOVOU 0(R10), X0
MOVOU X0, 0(DI)
ADDQ AX, DI
inner1:
// for { etc }
// base := s
MOVQ SI, R12
// !!! offset := base - candidate
MOVQ R12, R11
SUBQ R15, R11
SUBQ DX, R11
// ----------------------------------------
// Begin inline of the extendMatch call.
//
// s = extendMatch(src, candidate+4, s+4)
// !!! R14 = &src[len(src)]
MOVQ src_len+32(FP), R14
ADDQ DX, R14
// !!! R13 = &src[len(src) - 8]
MOVQ R14, R13
SUBQ $8, R13
// !!! R15 = &src[candidate + 4]
ADDQ $4, R15
ADDQ DX, R15
// !!! s += 4
ADDQ $4, SI
inlineExtendMatchCmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ SI, R13
JA inlineExtendMatchCmp1
MOVQ (R15), AX
MOVQ (SI), BX
CMPQ AX, BX
JNE inlineExtendMatchBSF
ADDQ $8, R15
ADDQ $8, SI
JMP inlineExtendMatchCmp8
inlineExtendMatchBSF:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, SI
JMP inlineExtendMatchEnd
inlineExtendMatchCmp1:
// In src's tail, compare 1 byte at a time.
CMPQ SI, R14
JAE inlineExtendMatchEnd
MOVB (R15), AX
MOVB (SI), BX
CMPB AX, BX
JNE inlineExtendMatchEnd
ADDQ $1, R15
ADDQ $1, SI
JMP inlineExtendMatchCmp1
inlineExtendMatchEnd:
// End inline of the extendMatch call.
// ----------------------------------------
// ----------------------------------------
// Begin inline of the emitCopy call.
//
// d += emitCopy(dst[d:], base-candidate, s-base)
// !!! length := s - base
MOVQ SI, AX
SUBQ R12, AX
inlineEmitCopyLoop0:
// for length >= 68 { etc }
CMPL AX, $68
JLT inlineEmitCopyStep1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $64, AX
JMP inlineEmitCopyLoop0
inlineEmitCopyStep1:
// if length > 64 { etc }
CMPL AX, $64
JLE inlineEmitCopyStep2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $60, AX
inlineEmitCopyStep2:
// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
CMPL AX, $12
JGE inlineEmitCopyStep3
CMPL R11, $2048
JGE inlineEmitCopyStep3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(DI)
SHRL $8, R11
SHLB $5, R11
SUBB $4, AX
SHLB $2, AX
ORB AX, R11
ORB $1, R11
MOVB R11, 0(DI)
ADDQ $2, DI
JMP inlineEmitCopyEnd
inlineEmitCopyStep3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, AX
SHLB $2, AX
ORB $2, AX
MOVB AX, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
inlineEmitCopyEnd:
// End inline of the emitCopy call.
// ----------------------------------------
// nextEmit = s
MOVQ SI, R10
// if s >= sLimit { goto emitRemainder }
MOVQ SI, AX
SUBQ DX, AX
CMPQ AX, R9
JAE emitRemainder
// As per the encode_other.go code:
//
// We could immediately etc.
// x := load64(src, s-1)
MOVQ -1(SI), R14
// prevHash := hash(uint32(x>>0), shift)
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// table[prevHash] = uint16(s-1)
MOVQ SI, AX
SUBQ DX, AX
SUBQ $1, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// currHash := hash(uint32(x>>8), shift)
SHRQ $8, R14
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// candidate = int(table[currHash])
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
BYTE $0x4e
BYTE $0x0f
BYTE $0xb7
BYTE $0x7c
BYTE $0x5c
BYTE $0x78
// table[currHash] = uint16(s)
ADDQ $1, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// if uint32(x>>8) == load32(src, candidate) { continue }
MOVL (DX)(R15*1), BX
CMPL R14, BX
JEQ inner1
// nextHash = hash(uint32(x>>16), shift)
SHRQ $8, R14
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// s++
ADDQ $1, SI
// break out of the inner1 for loop, i.e. continue the outer loop.
JMP outer
emitRemainder:
// if nextEmit < len(src) { etc }
MOVQ src_len+32(FP), AX
ADDQ DX, AX
CMPQ R10, AX
JEQ encodeBlockEnd
// d += emitLiteral(dst[d:], src[nextEmit:])
//
// Push args.
MOVQ DI, 0(SP)
MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative.
MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative.
MOVQ R10, 24(SP)
SUBQ R10, AX
MOVQ AX, 32(SP)
MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative.
// Spill local variables (registers) onto the stack; call; unspill.
MOVQ DI, 80(SP)
CALL ·emitLiteral(SB)
MOVQ 80(SP), DI
// Finish the "d +=" part of "d += emitLiteral(etc)".
ADDQ 48(SP), DI
encodeBlockEnd:
MOVQ dst_base+0(FP), AX
SUBQ AX, DI
MOVQ DI, d+48(FP)
RET

View file

@ -1,722 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The asm code generally follows the pure Go code in encode_other.go, except
// where marked with a "!!!".
// ----------------------------------------------------------------------------
// func emitLiteral(dst, lit []byte) int
//
// All local variables fit into registers. The register allocation:
// - R3 len(lit)
// - R4 n
// - R6 return value
// - R8 &dst[i]
// - R10 &lit[0]
//
// The 32 bytes of stack space is to call runtime·memmove.
//
// The unusual register allocation of local variables, such as R10 for the
// source pointer, matches the allocation used at the call site in encodeBlock,
// which makes it easier to manually inline this function.
TEXT ·emitLiteral(SB), NOSPLIT, $32-56
MOVD dst_base+0(FP), R8
MOVD lit_base+24(FP), R10
MOVD lit_len+32(FP), R3
MOVD R3, R6
MOVW R3, R4
SUBW $1, R4, R4
CMPW $60, R4
BLT oneByte
CMPW $256, R4
BLT twoBytes
threeBytes:
MOVD $0xf4, R2
MOVB R2, 0(R8)
MOVW R4, 1(R8)
ADD $3, R8, R8
ADD $3, R6, R6
B memmove
twoBytes:
MOVD $0xf0, R2
MOVB R2, 0(R8)
MOVB R4, 1(R8)
ADD $2, R8, R8
ADD $2, R6, R6
B memmove
oneByte:
LSLW $2, R4, R4
MOVB R4, 0(R8)
ADD $1, R8, R8
ADD $1, R6, R6
memmove:
MOVD R6, ret+48(FP)
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// R8, R10 and R3 as arguments.
MOVD R8, 8(RSP)
MOVD R10, 16(RSP)
MOVD R3, 24(RSP)
CALL runtime·memmove(SB)
RET
// ----------------------------------------------------------------------------
// func emitCopy(dst []byte, offset, length int) int
//
// All local variables fit into registers. The register allocation:
// - R3 length
// - R7 &dst[0]
// - R8 &dst[i]
// - R11 offset
//
// The unusual register allocation of local variables, such as R11 for the
// offset, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·emitCopy(SB), NOSPLIT, $0-48
MOVD dst_base+0(FP), R8
MOVD R8, R7
MOVD offset+24(FP), R11
MOVD length+32(FP), R3
loop0:
// for length >= 68 { etc }
CMPW $68, R3
BLT step1
// Emit a length 64 copy, encoded as 3 bytes.
MOVD $0xfe, R2
MOVB R2, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
SUB $64, R3, R3
B loop0
step1:
// if length > 64 { etc }
CMP $64, R3
BLE step2
// Emit a length 60 copy, encoded as 3 bytes.
MOVD $0xee, R2
MOVB R2, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
SUB $60, R3, R3
step2:
// if length >= 12 || offset >= 2048 { goto step3 }
CMP $12, R3
BGE step3
CMPW $2048, R11
BGE step3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(R8)
LSRW $3, R11, R11
AND $0xe0, R11, R11
SUB $4, R3, R3
LSLW $2, R3
AND $0xff, R3, R3
ORRW R3, R11, R11
ORRW $1, R11, R11
MOVB R11, 0(R8)
ADD $2, R8, R8
// Return the number of bytes written.
SUB R7, R8, R8
MOVD R8, ret+40(FP)
RET
step3:
// Emit the remaining copy, encoded as 3 bytes.
SUB $1, R3, R3
AND $0xff, R3, R3
LSLW $2, R3, R3
ORRW $2, R3, R3
MOVB R3, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
// Return the number of bytes written.
SUB R7, R8, R8
MOVD R8, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func extendMatch(src []byte, i, j int) int
//
// All local variables fit into registers. The register allocation:
// - R6 &src[0]
// - R7 &src[j]
// - R13 &src[len(src) - 8]
// - R14 &src[len(src)]
// - R15 &src[i]
//
// The unusual register allocation of local variables, such as R15 for a source
// pointer, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·extendMatch(SB), NOSPLIT, $0-48
MOVD src_base+0(FP), R6
MOVD src_len+8(FP), R14
MOVD i+24(FP), R15
MOVD j+32(FP), R7
ADD R6, R14, R14
ADD R6, R15, R15
ADD R6, R7, R7
MOVD R14, R13
SUB $8, R13, R13
cmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMP R13, R7
BHI cmp1
MOVD (R15), R3
MOVD (R7), R4
CMP R4, R3
BNE bsf
ADD $8, R15, R15
ADD $8, R7, R7
B cmp8
bsf:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs.
// RBIT reverses the bit order, then CLZ counts the leading zeros, the
// combination of which finds the least significant bit which is set.
// The arm64 architecture is little-endian, and the shift by 3 converts
// a bit index to a byte index.
EOR R3, R4, R4
RBIT R4, R4
CLZ R4, R4
ADD R4>>3, R7, R7
// Convert from &src[ret] to ret.
SUB R6, R7, R7
MOVD R7, ret+40(FP)
RET
cmp1:
// In src's tail, compare 1 byte at a time.
CMP R7, R14
BLS extendMatchEnd
MOVB (R15), R3
MOVB (R7), R4
CMP R4, R3
BNE extendMatchEnd
ADD $1, R15, R15
ADD $1, R7, R7
B cmp1
extendMatchEnd:
// Convert from &src[ret] to ret.
SUB R6, R7, R7
MOVD R7, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func encodeBlock(dst, src []byte) (d int)
//
// All local variables fit into registers, other than "var table". The register
// allocation:
// - R3 . .
// - R4 . .
// - R5 64 shift
// - R6 72 &src[0], tableSize
// - R7 80 &src[s]
// - R8 88 &dst[d]
// - R9 96 sLimit
// - R10 . &src[nextEmit]
// - R11 104 prevHash, currHash, nextHash, offset
// - R12 112 &src[base], skip
// - R13 . &src[nextS], &src[len(src) - 8]
// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
// - R15 120 candidate
// - R16 . hash constant, 0x1e35a7bd
// - R17 . &table
// - . 128 table
//
// The second column (64, 72, etc) is the stack offset to spill the registers
// when calling other functions. We could pack this slightly tighter, but it's
// simpler to have a dedicated spill map independent of the function called.
//
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill
// local variables (registers) during calls gives 32768 + 64 + 64 = 32896.
TEXT ·encodeBlock(SB), 0, $32896-56
MOVD dst_base+0(FP), R8
MOVD src_base+24(FP), R7
MOVD src_len+32(FP), R14
// shift, tableSize := uint32(32-8), 1<<8
MOVD $24, R5
MOVD $256, R6
MOVW $0xa7bd, R16
MOVKW $(0x1e35<<16), R16
calcShift:
// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
// shift--
// }
MOVD $16384, R2
CMP R2, R6
BGE varTable
CMP R14, R6
BGE varTable
SUB $1, R5, R5
LSL $1, R6, R6
B calcShift
varTable:
// var table [maxTableSize]uint16
//
// In the asm code, unlike the Go code, we can zero-initialize only the
// first tableSize elements. Each uint16 element is 2 bytes and each
// iterations writes 64 bytes, so we can do only tableSize/32 writes
// instead of the 2048 writes that would zero-initialize all of table's
// 32768 bytes. This clear could overrun the first tableSize elements, but
// it won't overrun the allocated stack size.
ADD $128, RSP, R17
MOVD R17, R4
// !!! R6 = &src[tableSize]
ADD R6<<1, R17, R6
memclr:
STP.P (ZR, ZR), 64(R4)
STP (ZR, ZR), -48(R4)
STP (ZR, ZR), -32(R4)
STP (ZR, ZR), -16(R4)
CMP R4, R6
BHI memclr
// !!! R6 = &src[0]
MOVD R7, R6
// sLimit := len(src) - inputMargin
MOVD R14, R9
SUB $15, R9, R9
// !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't
// change for the rest of the function.
MOVD R5, 64(RSP)
MOVD R6, 72(RSP)
MOVD R9, 96(RSP)
// nextEmit := 0
MOVD R6, R10
// s := 1
ADD $1, R7, R7
// nextHash := hash(load32(src, s), shift)
MOVW 0(R7), R11
MULW R16, R11, R11
LSRW R5, R11, R11
outer:
// for { etc }
// skip := 32
MOVD $32, R12
// nextS := s
MOVD R7, R13
// candidate := 0
MOVD $0, R15
inner0:
// for { etc }
// s := nextS
MOVD R13, R7
// bytesBetweenHashLookups := skip >> 5
MOVD R12, R14
LSR $5, R14, R14
// nextS = s + bytesBetweenHashLookups
ADD R14, R13, R13
// skip += bytesBetweenHashLookups
ADD R14, R12, R12
// if nextS > sLimit { goto emitRemainder }
MOVD R13, R3
SUB R6, R3, R3
CMP R9, R3
BHI emitRemainder
// candidate = int(table[nextHash])
MOVHU 0(R17)(R11<<1), R15
// table[nextHash] = uint16(s)
MOVD R7, R3
SUB R6, R3, R3
MOVH R3, 0(R17)(R11<<1)
// nextHash = hash(load32(src, nextS), shift)
MOVW 0(R13), R11
MULW R16, R11
LSRW R5, R11, R11
// if load32(src, s) != load32(src, candidate) { continue } break
MOVW 0(R7), R3
MOVW (R6)(R15*1), R4
CMPW R4, R3
BNE inner0
fourByteMatch:
// As per the encode_other.go code:
//
// A 4-byte match has been found. We'll later see etc.
// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
// on inputMargin in encode.go.
MOVD R7, R3
SUB R10, R3, R3
CMP $16, R3
BLE emitLiteralFastPath
// ----------------------------------------
// Begin inline of the emitLiteral call.
//
// d += emitLiteral(dst[d:], src[nextEmit:s])
MOVW R3, R4
SUBW $1, R4, R4
MOVW $60, R2
CMPW R2, R4
BLT inlineEmitLiteralOneByte
MOVW $256, R2
CMPW R2, R4
BLT inlineEmitLiteralTwoBytes
inlineEmitLiteralThreeBytes:
MOVD $0xf4, R1
MOVB R1, 0(R8)
MOVW R4, 1(R8)
ADD $3, R8, R8
B inlineEmitLiteralMemmove
inlineEmitLiteralTwoBytes:
MOVD $0xf0, R1
MOVB R1, 0(R8)
MOVB R4, 1(R8)
ADD $2, R8, R8
B inlineEmitLiteralMemmove
inlineEmitLiteralOneByte:
LSLW $2, R4, R4
MOVB R4, 0(R8)
ADD $1, R8, R8
inlineEmitLiteralMemmove:
// Spill local variables (registers) onto the stack; call; unspill.
//
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// R8, R10 and R3 as arguments.
MOVD R8, 8(RSP)
MOVD R10, 16(RSP)
MOVD R3, 24(RSP)
// Finish the "d +=" part of "d += emitLiteral(etc)".
ADD R3, R8, R8
MOVD R7, 80(RSP)
MOVD R8, 88(RSP)
MOVD R15, 120(RSP)
CALL runtime·memmove(SB)
MOVD 64(RSP), R5
MOVD 72(RSP), R6
MOVD 80(RSP), R7
MOVD 88(RSP), R8
MOVD 96(RSP), R9
MOVD 120(RSP), R15
ADD $128, RSP, R17
MOVW $0xa7bd, R16
MOVKW $(0x1e35<<16), R16
B inner1
inlineEmitLiteralEnd:
// End inline of the emitLiteral call.
// ----------------------------------------
emitLiteralFastPath:
// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
MOVB R3, R4
SUBW $1, R4, R4
AND $0xff, R4, R4
LSLW $2, R4, R4
MOVB R4, (R8)
ADD $1, R8, R8
// !!! Implement the copy from lit to dst as a 16-byte load and store.
// (Encode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only len(lit) bytes, but that's
// OK. Subsequent iterations will fix up the overrun.
//
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
LDP 0(R10), (R0, R1)
STP (R0, R1), 0(R8)
ADD R3, R8, R8
inner1:
// for { etc }
// base := s
MOVD R7, R12
// !!! offset := base - candidate
MOVD R12, R11
SUB R15, R11, R11
SUB R6, R11, R11
// ----------------------------------------
// Begin inline of the extendMatch call.
//
// s = extendMatch(src, candidate+4, s+4)
// !!! R14 = &src[len(src)]
MOVD src_len+32(FP), R14
ADD R6, R14, R14
// !!! R13 = &src[len(src) - 8]
MOVD R14, R13
SUB $8, R13, R13
// !!! R15 = &src[candidate + 4]
ADD $4, R15, R15
ADD R6, R15, R15
// !!! s += 4
ADD $4, R7, R7
inlineExtendMatchCmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMP R13, R7
BHI inlineExtendMatchCmp1
MOVD (R15), R3
MOVD (R7), R4
CMP R4, R3
BNE inlineExtendMatchBSF
ADD $8, R15, R15
ADD $8, R7, R7
B inlineExtendMatchCmp8
inlineExtendMatchBSF:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs.
// RBIT reverses the bit order, then CLZ counts the leading zeros, the
// combination of which finds the least significant bit which is set.
// The arm64 architecture is little-endian, and the shift by 3 converts
// a bit index to a byte index.
EOR R3, R4, R4
RBIT R4, R4
CLZ R4, R4
ADD R4>>3, R7, R7
B inlineExtendMatchEnd
inlineExtendMatchCmp1:
// In src's tail, compare 1 byte at a time.
CMP R7, R14
BLS inlineExtendMatchEnd
MOVB (R15), R3
MOVB (R7), R4
CMP R4, R3
BNE inlineExtendMatchEnd
ADD $1, R15, R15
ADD $1, R7, R7
B inlineExtendMatchCmp1
inlineExtendMatchEnd:
// End inline of the extendMatch call.
// ----------------------------------------
// ----------------------------------------
// Begin inline of the emitCopy call.
//
// d += emitCopy(dst[d:], base-candidate, s-base)
// !!! length := s - base
MOVD R7, R3
SUB R12, R3, R3
inlineEmitCopyLoop0:
// for length >= 68 { etc }
MOVW $68, R2
CMPW R2, R3
BLT inlineEmitCopyStep1
// Emit a length 64 copy, encoded as 3 bytes.
MOVD $0xfe, R1
MOVB R1, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
SUBW $64, R3, R3
B inlineEmitCopyLoop0
inlineEmitCopyStep1:
// if length > 64 { etc }
MOVW $64, R2
CMPW R2, R3
BLE inlineEmitCopyStep2
// Emit a length 60 copy, encoded as 3 bytes.
MOVD $0xee, R1
MOVB R1, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
SUBW $60, R3, R3
inlineEmitCopyStep2:
// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
MOVW $12, R2
CMPW R2, R3
BGE inlineEmitCopyStep3
MOVW $2048, R2
CMPW R2, R11
BGE inlineEmitCopyStep3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(R8)
LSRW $8, R11, R11
LSLW $5, R11, R11
SUBW $4, R3, R3
AND $0xff, R3, R3
LSLW $2, R3, R3
ORRW R3, R11, R11
ORRW $1, R11, R11
MOVB R11, 0(R8)
ADD $2, R8, R8
B inlineEmitCopyEnd
inlineEmitCopyStep3:
// Emit the remaining copy, encoded as 3 bytes.
SUBW $1, R3, R3
LSLW $2, R3, R3
ORRW $2, R3, R3
MOVB R3, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
inlineEmitCopyEnd:
// End inline of the emitCopy call.
// ----------------------------------------
// nextEmit = s
MOVD R7, R10
// if s >= sLimit { goto emitRemainder }
MOVD R7, R3
SUB R6, R3, R3
CMP R3, R9
BLS emitRemainder
// As per the encode_other.go code:
//
// We could immediately etc.
// x := load64(src, s-1)
MOVD -1(R7), R14
// prevHash := hash(uint32(x>>0), shift)
MOVW R14, R11
MULW R16, R11, R11
LSRW R5, R11, R11
// table[prevHash] = uint16(s-1)
MOVD R7, R3
SUB R6, R3, R3
SUB $1, R3, R3
MOVHU R3, 0(R17)(R11<<1)
// currHash := hash(uint32(x>>8), shift)
LSR $8, R14, R14
MOVW R14, R11
MULW R16, R11, R11
LSRW R5, R11, R11
// candidate = int(table[currHash])
MOVHU 0(R17)(R11<<1), R15
// table[currHash] = uint16(s)
ADD $1, R3, R3
MOVHU R3, 0(R17)(R11<<1)
// if uint32(x>>8) == load32(src, candidate) { continue }
MOVW (R6)(R15*1), R4
CMPW R4, R14
BEQ inner1
// nextHash = hash(uint32(x>>16), shift)
LSR $8, R14, R14
MOVW R14, R11
MULW R16, R11, R11
LSRW R5, R11, R11
// s++
ADD $1, R7, R7
// break out of the inner1 for loop, i.e. continue the outer loop.
B outer
emitRemainder:
// if nextEmit < len(src) { etc }
MOVD src_len+32(FP), R3
ADD R6, R3, R3
CMP R3, R10
BEQ encodeBlockEnd
// d += emitLiteral(dst[d:], src[nextEmit:])
//
// Push args.
MOVD R8, 8(RSP)
MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative.
MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative.
MOVD R10, 32(RSP)
SUB R10, R3, R3
MOVD R3, 40(RSP)
MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative.
// Spill local variables (registers) onto the stack; call; unspill.
MOVD R8, 88(RSP)
CALL ·emitLiteral(SB)
MOVD 88(RSP), R8
// Finish the "d +=" part of "d += emitLiteral(etc)".
MOVD 56(RSP), R1
ADD R1, R8, R8
encodeBlockEnd:
MOVD dst_base+0(FP), R3
SUB R3, R8, R8
MOVD R8, d+48(FP)
RET

View file

@ -1,30 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
// +build amd64 arm64
package snappy
// emitLiteral has the same semantics as in encode_other.go.
//
//go:noescape
func emitLiteral(dst, lit []byte) int
// emitCopy has the same semantics as in encode_other.go.
//
//go:noescape
func emitCopy(dst []byte, offset, length int) int
// extendMatch has the same semantics as in encode_other.go.
//
//go:noescape
func extendMatch(src []byte, i, j int) int
// encodeBlock has the same semantics as in encode_other.go.
//
//go:noescape
func encodeBlock(dst, src []byte) (d int)

View file

@ -315,7 +315,7 @@ func (tf Transform) Option() Option { return tf.trans }
// pops the address from the stack. Thus, when traversing into a pointer from
// reflect.Ptr, reflect.Slice element, or reflect.Map, we can detect cycles
// by checking whether the pointer has already been visited. The cycle detection
// uses a seperate stack for the x and y values.
// uses a separate stack for the x and y values.
//
// If a cycle is detected we need to determine whether the two pointers
// should be considered equal. The definition of equality chosen by Equal

View file

@ -7,6 +7,7 @@ package cmp
import (
"bytes"
"fmt"
"math"
"reflect"
"strconv"
"strings"
@ -96,15 +97,16 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
}
// Auto-detect the type of the data.
var isLinedText, isText, isBinary bool
var sx, sy string
var ssx, ssy []string
var isString, isMostlyText, isPureLinedText, isBinary bool
switch {
case t.Kind() == reflect.String:
sx, sy = vx.String(), vy.String()
isText = true // Initial estimate, verify later
isString = true
case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
sx, sy = string(vx.Bytes()), string(vy.Bytes())
isBinary = true // Initial estimate, verify later
isString = true
case t.Kind() == reflect.Array:
// Arrays need to be addressable for slice operations to work.
vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
@ -112,13 +114,12 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
vy2.Set(vy)
vx, vy = vx2, vy2
}
if isText || isBinary {
var numLines, lastLineIdx, maxLineLen int
isBinary = !utf8.ValidString(sx) || !utf8.ValidString(sy)
if isString {
var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int
for i, r := range sx + sy {
if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
isBinary = true
break
numTotalRunes++
if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError {
numValidRunes++
}
if r == '\n' {
if maxLineLen < i-lastLineIdx {
@ -128,8 +129,26 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
numLines++
}
}
isText = !isBinary
isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
isPureText := numValidRunes == numTotalRunes
isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes))
isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024
isBinary = !isMostlyText
// Avoid diffing by lines if it produces a significantly more complex
// edit script than diffing by bytes.
if isPureLinedText {
ssx = strings.Split(sx, "\n")
ssy = strings.Split(sy, "\n")
esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result {
return diff.BoolResult(ssx[ix] == ssy[iy])
})
esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result {
return diff.BoolResult(sx[ix] == sy[iy])
})
efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
isPureLinedText = efficiencyLines < 4*efficiencyBytes
}
}
// Format the string into printable records.
@ -138,9 +157,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
switch {
// If the text appears to be multi-lined text,
// then perform differencing across individual lines.
case isLinedText:
ssx := strings.Split(sx, "\n")
ssy := strings.Split(sy, "\n")
case isPureLinedText:
list = opts.formatDiffSlice(
reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
func(v reflect.Value, d diffMode) textRecord {
@ -229,7 +246,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
// If the text appears to be single-lined text,
// then perform differencing in approximately fixed-sized chunks.
// The output is printed as quoted strings.
case isText:
case isMostlyText:
list = opts.formatDiffSlice(
reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
func(v reflect.Value, d diffMode) textRecord {
@ -237,7 +254,6 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
return textRecord{Diff: d, Value: textLine(s)}
},
)
delim = ""
// If the text appears to be binary data,
// then perform differencing in approximately fixed-sized chunks.
@ -299,7 +315,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
// Wrap the output with appropriate type information.
var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
if !isText {
if !isMostlyText {
// The "{...}" byte-sequence literal is not valid Go syntax for strings.
// Emit the type for extra clarity (e.g. "string{...}").
if t.Kind() == reflect.String {
@ -338,8 +354,11 @@ func (opts formatOptions) formatDiffSlice(
vx, vy reflect.Value, chunkSize int, name string,
makeRec func(reflect.Value, diffMode) textRecord,
) (list textList) {
es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
eq := func(ix, iy int) bool {
return vx.Index(ix).Interface() == vy.Index(iy).Interface()
}
es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
return diff.BoolResult(eq(ix, iy))
})
appendChunks := func(v reflect.Value, d diffMode) int {
@ -364,6 +383,7 @@ func (opts formatOptions) formatDiffSlice(
groups := coalesceAdjacentEdits(name, es)
groups = coalesceInterveningIdentical(groups, chunkSize/4)
groups = cleanupSurroundingIdentical(groups, eq)
maxGroup := diffStats{Name: name}
for i, ds := range groups {
if maxLen >= 0 && numDiffs >= maxLen {
@ -416,25 +436,36 @@ func (opts formatOptions) formatDiffSlice(
// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
// equal or unequal counts.
//
// Example:
//
// Input: "..XXY...Y"
// Output: [
// {NumIdentical: 2},
// {NumRemoved: 2, NumInserted 1},
// {NumIdentical: 3},
// {NumInserted: 1},
// ]
//
func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
var prevCase int // Arbitrary index into which case last occurred
lastStats := func(i int) *diffStats {
if prevCase != i {
var prevMode byte
lastStats := func(mode byte) *diffStats {
if prevMode != mode {
groups = append(groups, diffStats{Name: name})
prevCase = i
prevMode = mode
}
return &groups[len(groups)-1]
}
for _, e := range es {
switch e {
case diff.Identity:
lastStats(1).NumIdentical++
lastStats('=').NumIdentical++
case diff.UniqueX:
lastStats(2).NumRemoved++
lastStats('!').NumRemoved++
case diff.UniqueY:
lastStats(2).NumInserted++
lastStats('!').NumInserted++
case diff.Modified:
lastStats(2).NumModified++
lastStats('!').NumModified++
}
}
return groups
@ -444,6 +475,35 @@ func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats)
// equal groups into adjacent unequal groups that currently result in a
// dual inserted/removed printout. This acts as a high-pass filter to smooth
// out high-frequency changes within the windowSize.
//
// Example:
//
// WindowSize: 16,
// Input: [
// {NumIdentical: 61}, // group 0
// {NumRemoved: 3, NumInserted: 1}, // group 1
// {NumIdentical: 6}, // ├── coalesce
// {NumInserted: 2}, // ├── coalesce
// {NumIdentical: 1}, // ├── coalesce
// {NumRemoved: 9}, // └── coalesce
// {NumIdentical: 64}, // group 2
// {NumRemoved: 3, NumInserted: 1}, // group 3
// {NumIdentical: 6}, // ├── coalesce
// {NumInserted: 2}, // ├── coalesce
// {NumIdentical: 1}, // ├── coalesce
// {NumRemoved: 7}, // ├── coalesce
// {NumIdentical: 1}, // ├── coalesce
// {NumRemoved: 2}, // └── coalesce
// {NumIdentical: 63}, // group 4
// ]
// Output: [
// {NumIdentical: 61},
// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3},
// {NumIdentical: 64},
// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3},
// {NumIdentical: 63},
// ]
//
func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
groups, groupsOrig := groups[:0], groups
for i, ds := range groupsOrig {
@ -463,3 +523,91 @@ func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStat
}
return groups
}
// cleanupSurroundingIdentical scans through all unequal groups, and
// moves any leading sequence of equal elements to the preceding equal group and
// moves and trailing sequence of equal elements to the succeeding equal group.
//
// This is necessary since coalesceInterveningIdentical may coalesce edit groups
// together such that leading/trailing spans of equal elements becomes possible.
// Note that this can occur even with an optimal diffing algorithm.
//
// Example:
//
// Input: [
// {NumIdentical: 61},
// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements
// {NumIdentical: 67},
// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements
// {NumIdentical: 54},
// ]
// Output: [
// {NumIdentical: 64}, // incremented by 3
// {NumRemoved: 9},
// {NumIdentical: 67},
// {NumRemoved: 9},
// {NumIdentical: 64}, // incremented by 10
// ]
//
func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats {
var ix, iy int // indexes into sequence x and y
for i, ds := range groups {
// Handle equal group.
if ds.NumDiff() == 0 {
ix += ds.NumIdentical
iy += ds.NumIdentical
continue
}
// Handle unequal group.
nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified
ny := ds.NumIdentical + ds.NumInserted + ds.NumModified
var numLeadingIdentical, numTrailingIdentical int
for i := 0; i < nx && i < ny && eq(ix+i, iy+i); i++ {
numLeadingIdentical++
}
for i := 0; i < nx && i < ny && eq(ix+nx-1-i, iy+ny-1-i); i++ {
numTrailingIdentical++
}
if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 {
if numLeadingIdentical > 0 {
// Remove leading identical span from this group and
// insert it into the preceding group.
if i-1 >= 0 {
groups[i-1].NumIdentical += numLeadingIdentical
} else {
// No preceding group exists, so prepend a new group,
// but do so after we finish iterating over all groups.
defer func() {
groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...)
}()
}
// Increment indexes since the preceding group would have handled this.
ix += numLeadingIdentical
iy += numLeadingIdentical
}
if numTrailingIdentical > 0 {
// Remove trailing identical span from this group and
// insert it into the succeeding group.
if i+1 < len(groups) {
groups[i+1].NumIdentical += numTrailingIdentical
} else {
// No succeeding group exists, so append a new group,
// but do so after we finish iterating over all groups.
defer func() {
groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical})
}()
}
// Do not increment indexes since the succeeding group will handle this.
}
// Update this group since some identical elements were removed.
nx -= numIdentical
ny -= numIdentical
groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny}
}
ix += nx
iy += ny
}
return groups
}

2
vendor/github.com/klauspost/compress/.gitattributes generated vendored Normal file
View file

@ -0,0 +1,2 @@
* -text
*.bin -text -diff

25
vendor/github.com/klauspost/compress/.gitignore generated vendored Normal file
View file

@ -0,0 +1,25 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
/s2/cmd/_s2sx/sfx-exe

141
vendor/github.com/klauspost/compress/.goreleaser.yml generated vendored Normal file
View file

@ -0,0 +1,141 @@
# This is an example goreleaser.yaml file with some sane defaults.
# Make sure to check the documentation at http://goreleaser.com
before:
hooks:
- ./gen.sh
- go install mvdan.cc/garble@latest
builds:
-
id: "s2c"
binary: s2c
main: ./s2/cmd/s2c/main.go
flags:
- -trimpath
env:
- CGO_ENABLED=0
goos:
- aix
- linux
- freebsd
- netbsd
- windows
- darwin
goarch:
- 386
- amd64
- arm
- arm64
- ppc64
- ppc64le
- mips64
- mips64le
goarm:
- 7
gobinary: garble
-
id: "s2d"
binary: s2d
main: ./s2/cmd/s2d/main.go
flags:
- -trimpath
env:
- CGO_ENABLED=0
goos:
- aix
- linux
- freebsd
- netbsd
- windows
- darwin
goarch:
- 386
- amd64
- arm
- arm64
- ppc64
- ppc64le
- mips64
- mips64le
goarm:
- 7
gobinary: garble
-
id: "s2sx"
binary: s2sx
main: ./s2/cmd/_s2sx/main.go
flags:
- -modfile=s2sx.mod
- -trimpath
env:
- CGO_ENABLED=0
goos:
- aix
- linux
- freebsd
- netbsd
- windows
- darwin
goarch:
- 386
- amd64
- arm
- arm64
- ppc64
- ppc64le
- mips64
- mips64le
goarm:
- 7
gobinary: garble
archives:
-
id: s2-binaries
name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
replacements:
aix: AIX
darwin: OSX
linux: Linux
windows: Windows
386: i386
amd64: x86_64
freebsd: FreeBSD
netbsd: NetBSD
format_overrides:
- goos: windows
format: zip
files:
- unpack/*
- s2/LICENSE
- s2/README.md
checksum:
name_template: 'checksums.txt'
snapshot:
name_template: "{{ .Tag }}-next"
changelog:
sort: asc
filters:
exclude:
- '^doc:'
- '^docs:'
- '^test:'
- '^tests:'
- '^Update\sREADME.md'
nfpms:
-
file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
vendor: Klaus Post
homepage: https://github.com/klauspost/compress
maintainer: Klaus Post <klauspost@gmail.com>
description: S2 Compression Tool
license: BSD 3-Clause
formats:
- deb
- rpm
replacements:
darwin: Darwin
linux: Linux
freebsd: FreeBSD
amd64: x86_64

View file

@ -26,3 +26,279 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
------------------
Files: gzhttp/*
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016-2017 The New York Times Company
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
------------------
Files: s2/cmd/internal/readahead/*
The MIT License (MIT)
Copyright (c) 2015 Klaus Post
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---------------------
Files: snappy/*
Files: internal/snapref/*
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-----------------
Files: s2/cmd/internal/filepathx/*
Copyright 2016 The filepathx Authors
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

452
vendor/github.com/klauspost/compress/README.md generated vendored Normal file
View file

@ -0,0 +1,452 @@
# compress
This package provides various compression algorithms.
* [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression in pure Go.
* [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) is a high performance replacement for Snappy.
* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib).
* [snappy](https://github.com/klauspost/compress/tree/master/snappy) is a drop-in replacement for `github.com/golang/snappy` offering better compression and concurrent streams.
* [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding.
* [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently.
* [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation.
* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here.
[![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories)
[![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml)
[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge)
# changelog
* Jan 11, 2022 (v1.14.1)
* s2: Add stream index in [#462](https://github.com/klauspost/compress/pull/462)
* flate: Speed and efficiency improvements in [#439](https://github.com/klauspost/compress/pull/439) [#461](https://github.com/klauspost/compress/pull/461) [#455](https://github.com/klauspost/compress/pull/455) [#452](https://github.com/klauspost/compress/pull/452) [#458](https://github.com/klauspost/compress/pull/458)
* zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468)
* zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464)
* Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445)
* Aug 30, 2021 (v1.13.5)
* gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425)
* s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413)
* zstd: pooledZipWriter should return Writers to the same pool [#426](https://github.com/klauspost/compress/pull/426)
* Removed golang/snappy as external dependency for tests [#421](https://github.com/klauspost/compress/pull/421)
* Aug 12, 2021 (v1.13.4)
* Add [snappy replacement package](https://github.com/klauspost/compress/tree/master/snappy).
* zstd: Fix incorrect encoding in "best" mode [#415](https://github.com/klauspost/compress/pull/415)
* Aug 3, 2021 (v1.13.3)
* zstd: Improve Best compression [#404](https://github.com/klauspost/compress/pull/404)
* zstd: Fix WriteTo error forwarding [#411](https://github.com/klauspost/compress/pull/411)
* gzhttp: Return http.HandlerFunc instead of http.Handler. Unlikely breaking change. [#406](https://github.com/klauspost/compress/pull/406)
* s2sx: Fix max size error [#399](https://github.com/klauspost/compress/pull/399)
* zstd: Add optional stream content size on reset [#401](https://github.com/klauspost/compress/pull/401)
* zstd: use SpeedBestCompression for level >= 10 [#410](https://github.com/klauspost/compress/pull/410)
* Jun 14, 2021 (v1.13.1)
* s2: Add full Snappy output support [#396](https://github.com/klauspost/compress/pull/396)
* zstd: Add configurable [Decoder window](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithDecoderMaxWindow) size [#394](https://github.com/klauspost/compress/pull/394)
* gzhttp: Add header to skip compression [#389](https://github.com/klauspost/compress/pull/389)
* s2: Improve speed with bigger output margin [#395](https://github.com/klauspost/compress/pull/395)
* Jun 3, 2021 (v1.13.0)
* Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors.
* zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382)
* zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380)
* May 25, 2021 (v1.12.3)
* deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374)
* deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375)
* zstd: Forward read errors [#373](https://github.com/klauspost/compress/pull/373)
* Apr 27, 2021 (v1.12.2)
* zstd: Improve better/best compression [#360](https://github.com/klauspost/compress/pull/360) [#364](https://github.com/klauspost/compress/pull/364) [#365](https://github.com/klauspost/compress/pull/365)
* zstd: Add helpers to compress/decompress zstd inside zip files [#363](https://github.com/klauspost/compress/pull/363)
* deflate: Improve level 5+6 compression [#367](https://github.com/klauspost/compress/pull/367)
* s2: Improve better/best compression [#358](https://github.com/klauspost/compress/pull/358) [#359](https://github.com/klauspost/compress/pull/358)
* s2: Load after checking src limit on amd64. [#362](https://github.com/klauspost/compress/pull/362)
* s2sx: Limit max executable size [#368](https://github.com/klauspost/compress/pull/368)
* Apr 14, 2021 (v1.12.1)
* snappy package removed. Upstream added as dependency.
* s2: Better compression in "best" mode [#353](https://github.com/klauspost/compress/pull/353)
* s2sx: Add stdin input and detect pre-compressed from signature [#352](https://github.com/klauspost/compress/pull/352)
* s2c/s2d: Add http as possible input [#348](https://github.com/klauspost/compress/pull/348)
* s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352)
* zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346)
* s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349)
<details>
<summary>See changes prior to v1.12.1</summary>
* Mar 26, 2021 (v1.11.13)
* zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345)
* zstd: Add [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) encoder option [#336](https://github.com/klauspost/compress/pull/336)
* deflate: Improve entropy compression [#338](https://github.com/klauspost/compress/pull/338)
* s2: Clean up and minor performance improvement in best [#341](https://github.com/klauspost/compress/pull/341)
* Mar 5, 2021 (v1.11.12)
* s2: Add `s2sx` binary that creates [self extracting archives](https://github.com/klauspost/compress/tree/master/s2#s2sx-self-extracting-archives).
* s2: Speed up decompression on non-assembly platforms [#328](https://github.com/klauspost/compress/pull/328)
* Mar 1, 2021 (v1.11.9)
* s2: Add ARM64 decompression assembly. Around 2x output speed. [#324](https://github.com/klauspost/compress/pull/324)
* s2: Improve "better" speed and efficiency. [#325](https://github.com/klauspost/compress/pull/325)
* s2: Fix binaries.
* Feb 25, 2021 (v1.11.8)
* s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended.
* s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315)
* s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322)
* zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314)
* zip: Fix zip64 headers. [#313](https://github.com/klauspost/compress/pull/313)
* Jan 14, 2021 (v1.11.7)
* Use Bytes() interface to get bytes across packages. [#309](https://github.com/klauspost/compress/pull/309)
* s2: Add 'best' compression option. [#310](https://github.com/klauspost/compress/pull/310)
* s2: Add ReaderMaxBlockSize, changes `s2.NewReader` signature to include varargs. [#311](https://github.com/klauspost/compress/pull/311)
* s2: Fix crash on small better buffers. [#308](https://github.com/klauspost/compress/pull/308)
* s2: Clean up decoder. [#312](https://github.com/klauspost/compress/pull/312)
* Jan 7, 2021 (v1.11.6)
* zstd: Make decoder allocations smaller [#306](https://github.com/klauspost/compress/pull/306)
* zstd: Free Decoder resources when Reset is called with a nil io.Reader [#305](https://github.com/klauspost/compress/pull/305)
* Dec 20, 2020 (v1.11.4)
* zstd: Add Best compression mode [#304](https://github.com/klauspost/compress/pull/304)
* Add header decoder [#299](https://github.com/klauspost/compress/pull/299)
* s2: Add uncompressed stream option [#297](https://github.com/klauspost/compress/pull/297)
* Simplify/speed up small blocks with known max size. [#300](https://github.com/klauspost/compress/pull/300)
* zstd: Always reset literal dict encoder [#303](https://github.com/klauspost/compress/pull/303)
* Nov 15, 2020 (v1.11.3)
* inflate: 10-15% faster decompression [#293](https://github.com/klauspost/compress/pull/293)
* zstd: Tweak DecodeAll default allocation [#295](https://github.com/klauspost/compress/pull/295)
* Oct 11, 2020 (v1.11.2)
* s2: Fix out of bounds read in "better" block compression [#291](https://github.com/klauspost/compress/pull/291)
* Oct 1, 2020 (v1.11.1)
* zstd: Set allLitEntropy true in default configuration [#286](https://github.com/klauspost/compress/pull/286)
* Sept 8, 2020 (v1.11.0)
* zstd: Add experimental compression [dictionaries](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) [#281](https://github.com/klauspost/compress/pull/281)
* zstd: Fix mixed Write and ReadFrom calls [#282](https://github.com/klauspost/compress/pull/282)
* inflate/gz: Limit variable shifts, ~5% faster decompression [#274](https://github.com/klauspost/compress/pull/274)
</details>
<details>
<summary>See changes prior to v1.11.0</summary>
* July 8, 2020 (v1.10.11)
* zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278)
* huff0: Also populate compression table when reading decoding table. [#275](https://github.com/klauspost/compress/pull/275)
* June 23, 2020 (v1.10.10)
* zstd: Skip entropy compression in fastest mode when no matches. [#270](https://github.com/klauspost/compress/pull/270)
* June 16, 2020 (v1.10.9):
* zstd: API change for specifying dictionaries. See [#268](https://github.com/klauspost/compress/pull/268)
* zip: update CreateHeaderRaw to handle zip64 fields. [#266](https://github.com/klauspost/compress/pull/266)
* Fuzzit tests removed. The service has been purchased and is no longer available.
* June 5, 2020 (v1.10.8):
* 1.15x faster zstd block decompression. [#265](https://github.com/klauspost/compress/pull/265)
* June 1, 2020 (v1.10.7):
* Added zstd decompression [dictionary support](https://github.com/klauspost/compress/tree/master/zstd#dictionaries)
* Increase zstd decompression speed up to 1.19x. [#259](https://github.com/klauspost/compress/pull/259)
* Remove internal reset call in zstd compression and reduce allocations. [#263](https://github.com/klauspost/compress/pull/263)
* May 21, 2020: (v1.10.6)
* zstd: Reduce allocations while decoding. [#258](https://github.com/klauspost/compress/pull/258), [#252](https://github.com/klauspost/compress/pull/252)
* zstd: Stricter decompression checks.
* April 12, 2020: (v1.10.5)
* s2-commands: Flush output when receiving SIGINT. [#239](https://github.com/klauspost/compress/pull/239)
* Apr 8, 2020: (v1.10.4)
* zstd: Minor/special case optimizations. [#251](https://github.com/klauspost/compress/pull/251), [#250](https://github.com/klauspost/compress/pull/250), [#249](https://github.com/klauspost/compress/pull/249), [#247](https://github.com/klauspost/compress/pull/247)
* Mar 11, 2020: (v1.10.3)
* s2: Use S2 encoder in pure Go mode for Snappy output as well. [#245](https://github.com/klauspost/compress/pull/245)
* s2: Fix pure Go block encoder. [#244](https://github.com/klauspost/compress/pull/244)
* zstd: Added "better compression" mode. [#240](https://github.com/klauspost/compress/pull/240)
* zstd: Improve speed of fastest compression mode by 5-10% [#241](https://github.com/klauspost/compress/pull/241)
* zstd: Skip creating encoders when not needed. [#238](https://github.com/klauspost/compress/pull/238)
* Feb 27, 2020: (v1.10.2)
* Close to 50% speedup in inflate (gzip/zip decompression). [#236](https://github.com/klauspost/compress/pull/236) [#234](https://github.com/klauspost/compress/pull/234) [#232](https://github.com/klauspost/compress/pull/232)
* Reduce deflate level 1-6 memory usage up to 59%. [#227](https://github.com/klauspost/compress/pull/227)
* Feb 18, 2020: (v1.10.1)
* Fix zstd crash when resetting multiple times without sending data. [#226](https://github.com/klauspost/compress/pull/226)
* deflate: Fix dictionary use on level 1-6. [#224](https://github.com/klauspost/compress/pull/224)
* Remove deflate writer reference when closing. [#224](https://github.com/klauspost/compress/pull/224)
* Feb 4, 2020: (v1.10.0)
* Add optional dictionary to [stateless deflate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc#StatelessDeflate). Breaking change, send `nil` for previous behaviour. [#216](https://github.com/klauspost/compress/pull/216)
* Fix buffer overflow on repeated small block deflate. [#218](https://github.com/klauspost/compress/pull/218)
* Allow copying content from an existing ZIP file without decompressing+compressing. [#214](https://github.com/klauspost/compress/pull/214)
* Added [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) AMD64 assembler and various optimizations. Stream speed >10GB/s. [#186](https://github.com/klauspost/compress/pull/186)
</details>
<details>
<summary>See changes prior to v1.10.0</summary>
* Jan 20,2020 (v1.9.8) Optimize gzip/deflate with better size estimates and faster table generation. [#207](https://github.com/klauspost/compress/pull/207) by [luyu6056](https://github.com/luyu6056), [#206](https://github.com/klauspost/compress/pull/206).
* Jan 11, 2020: S2 Encode/Decode will use provided buffer if capacity is big enough. [#204](https://github.com/klauspost/compress/pull/204)
* Jan 5, 2020: (v1.9.7) Fix another zstd regression in v1.9.5 - v1.9.6 removed.
* Jan 4, 2020: (v1.9.6) Regression in v1.9.5 fixed causing corrupt zstd encodes in rare cases.
* Jan 4, 2020: Faster IO in [s2c + s2d commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) compression/decompression. [#192](https://github.com/klauspost/compress/pull/192)
* Dec 29, 2019: Removed v1.9.5 since fuzz tests showed a compatibility problem with the reference zstandard decoder.
* Dec 29, 2019: (v1.9.5) zstd: 10-20% faster block compression. [#199](https://github.com/klauspost/compress/pull/199)
* Dec 29, 2019: [zip](https://godoc.org/github.com/klauspost/compress/zip) package updated with latest Go features
* Dec 29, 2019: zstd: Single segment flag condintions tweaked. [#197](https://github.com/klauspost/compress/pull/197)
* Dec 18, 2019: s2: Faster compression when ReadFrom is used. [#198](https://github.com/klauspost/compress/pull/198)
* Dec 10, 2019: s2: Fix repeat length output when just above at 16MB limit.
* Dec 10, 2019: zstd: Add function to get decoder as io.ReadCloser. [#191](https://github.com/klauspost/compress/pull/191)
* Dec 3, 2019: (v1.9.4) S2: limit max repeat length. [#188](https://github.com/klauspost/compress/pull/188)
* Dec 3, 2019: Add [WithNoEntropyCompression](https://godoc.org/github.com/klauspost/compress/zstd#WithNoEntropyCompression) to zstd [#187](https://github.com/klauspost/compress/pull/187)
* Dec 3, 2019: Reduce memory use for tests. Check for leaked goroutines.
* Nov 28, 2019 (v1.9.3) Less allocations in stateless deflate.
* Nov 28, 2019: 5-20% Faster huff0 decode. Impacts zstd as well. [#184](https://github.com/klauspost/compress/pull/184)
* Nov 12, 2019 (v1.9.2) Added [Stateless Compression](#stateless-compression) for gzip/deflate.
* Nov 12, 2019: Fixed zstd decompression of large single blocks. [#180](https://github.com/klauspost/compress/pull/180)
* Nov 11, 2019: Set default [s2c](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) block size to 4MB.
* Nov 11, 2019: Reduce inflate memory use by 1KB.
* Nov 10, 2019: Less allocations in deflate bit writer.
* Nov 10, 2019: Fix inconsistent error returned by zstd decoder.
* Oct 28, 2019 (v1.9.1) ztsd: Fix crash when compressing blocks. [#174](https://github.com/klauspost/compress/pull/174)
* Oct 24, 2019 (v1.9.0) zstd: Fix rare data corruption [#173](https://github.com/klauspost/compress/pull/173)
* Oct 24, 2019 zstd: Fix huff0 out of buffer write [#171](https://github.com/klauspost/compress/pull/171) and always return errors [#172](https://github.com/klauspost/compress/pull/172)
* Oct 10, 2019: Big deflate rewrite, 30-40% faster with better compression [#105](https://github.com/klauspost/compress/pull/105)
</details>
<details>
<summary>See changes prior to v1.9.0</summary>
* Oct 10, 2019: (v1.8.6) zstd: Allow partial reads to get flushed data. [#169](https://github.com/klauspost/compress/pull/169)
* Oct 3, 2019: Fix inconsistent results on broken zstd streams.
* Sep 25, 2019: Added `-rm` (remove source files) and `-q` (no output except errors) to `s2c` and `s2d` [commands](https://github.com/klauspost/compress/tree/master/s2#commandline-tools)
* Sep 16, 2019: (v1.8.4) Add `s2c` and `s2d` [commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools).
* Sep 10, 2019: (v1.8.3) Fix s2 decoder [Skip](https://godoc.org/github.com/klauspost/compress/s2#Reader.Skip).
* Sep 7, 2019: zstd: Added [WithWindowSize](https://godoc.org/github.com/klauspost/compress/zstd#WithWindowSize), contributed by [ianwilkes](https://github.com/ianwilkes).
* Sep 5, 2019: (v1.8.2) Add [WithZeroFrames](https://godoc.org/github.com/klauspost/compress/zstd#WithZeroFrames) which adds full zero payload block encoding option.
* Sep 5, 2019: Lazy initialization of zstandard predefined en/decoder tables.
* Aug 26, 2019: (v1.8.1) S2: 1-2% compression increase in "better" compression mode.
* Aug 26, 2019: zstd: Check maximum size of Huffman 1X compressed literals while decoding.
* Aug 24, 2019: (v1.8.0) Added [S2 compression](https://github.com/klauspost/compress/tree/master/s2#s2-compression), a high performance replacement for Snappy.
* Aug 21, 2019: (v1.7.6) Fixed minor issues found by fuzzer. One could lead to zstd not decompressing.
* Aug 18, 2019: Add [fuzzit](https://fuzzit.dev/) continuous fuzzing.
* Aug 14, 2019: zstd: Skip incompressible data 2x faster. [#147](https://github.com/klauspost/compress/pull/147)
* Aug 4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146)
* Aug 4, 2019: Faster zstd compression. [#143](https://github.com/klauspost/compress/pull/143) [#144](https://github.com/klauspost/compress/pull/144)
* Aug 4, 2019: Faster zstd decompression. [#145](https://github.com/klauspost/compress/pull/145) [#143](https://github.com/klauspost/compress/pull/143) [#142](https://github.com/klauspost/compress/pull/142)
* July 15, 2019 (v1.7.4): Fix double EOF block in rare cases on zstd encoder.
* July 15, 2019 (v1.7.3): Minor speedup/compression increase in default zstd encoder.
* July 14, 2019: zstd decoder: Fix decompression error on multiple uses with mixed content.
* July 7, 2019 (v1.7.2): Snappy update, zstd decoder potential race fix.
* June 17, 2019: zstd decompression bugfix.
* June 17, 2019: fix 32 bit builds.
* June 17, 2019: Easier use in modules (less dependencies).
* June 9, 2019: New stronger "default" [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression mode. Matches zstd default compression ratio.
* June 5, 2019: 20-40% throughput in [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and better compression.
* June 5, 2019: deflate/gzip compression: Reduce memory usage of lower compression levels.
* June 2, 2019: Added [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression!
* May 25, 2019: deflate/gzip: 10% faster bit writer, mostly visible in lower levels.
* Apr 22, 2019: [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) decompression added.
* Aug 1, 2018: Added [huff0 README](https://github.com/klauspost/compress/tree/master/huff0#huff0-entropy-compression).
* Jul 8, 2018: Added [Performance Update 2018](#performance-update-2018) below.
* Jun 23, 2018: Merged [Go 1.11 inflate optimizations](https://go-review.googlesource.com/c/go/+/102235). Go 1.9 is now required. Backwards compatible version tagged with [v1.3.0](https://github.com/klauspost/compress/releases/tag/v1.3.0).
* Apr 2, 2018: Added [huff0](https://godoc.org/github.com/klauspost/compress/huff0) en/decoder. Experimental for now, API may change.
* Mar 4, 2018: Added [FSE Entropy](https://godoc.org/github.com/klauspost/compress/fse) en/decoder. Experimental for now, API may change.
* Nov 3, 2017: Add compression [Estimate](https://godoc.org/github.com/klauspost/compress#Estimate) function.
* May 28, 2017: Reduce allocations when resetting decoder.
* Apr 02, 2017: Change back to official crc32, since changes were merged in Go 1.7.
* Jan 14, 2017: Reduce stack pressure due to array copies. See [Issue #18625](https://github.com/golang/go/issues/18625).
* Oct 25, 2016: Level 2-4 have been rewritten and now offers significantly better performance than before.
* Oct 20, 2016: Port zlib changes from Go 1.7 to fix zlib writer issue. Please update.
* Oct 16, 2016: Go 1.7 changes merged. Apples to apples this package is a few percent faster, but has a significantly better balance between speed and compression per level.
* Mar 24, 2016: Always attempt Huffman encoding on level 4-7. This improves base 64 encoded data compression.
* Mar 24, 2016: Small speedup for level 1-3.
* Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster.
* Feb 19, 2016: Handle small payloads faster in level 1-3.
* Feb 19, 2016: Added faster level 2 + 3 compression modes.
* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5.
* Feb 14, 2016: Snappy: Merge upstream changes.
* Feb 14, 2016: Snappy: Fix aggressive skipping.
* Feb 14, 2016: Snappy: Update benchmark.
* Feb 13, 2016: Deflate: Fixed assembler problem that could lead to sub-optimal compression.
* Feb 12, 2016: Snappy: Added AMD64 SSE 4.2 optimizations to matching, which makes easy to compress material run faster. Typical speedup is around 25%.
* Feb 9, 2016: Added Snappy package fork. This version is 5-7% faster, much more on hard to compress content.
* Jan 30, 2016: Optimize level 1 to 3 by not considering static dictionary or storing uncompressed. ~4-5% speedup.
* Jan 16, 2016: Optimization on deflate level 1,2,3 compression.
* Jan 8 2016: Merge [CL 18317](https://go-review.googlesource.com/#/c/18317): fix reading, writing of zip64 archives.
* Dec 8 2015: Make level 1 and -2 deterministic even if write size differs.
* Dec 8 2015: Split encoding functions, so hashing and matching can potentially be inlined. 1-3% faster on AMD64. 5% faster on other platforms.
* Dec 8 2015: Fixed rare [one byte out-of bounds read](https://github.com/klauspost/compress/issues/20). Please update!
* Nov 23 2015: Optimization on token writer. ~2-4% faster. Contributed by [@dsnet](https://github.com/dsnet).
* Nov 20 2015: Small optimization to bit writer on 64 bit systems.
* Nov 17 2015: Fixed out-of-bound errors if the underlying Writer returned an error. See [#15](https://github.com/klauspost/compress/issues/15).
* Nov 12 2015: Added [io.WriterTo](https://golang.org/pkg/io/#WriterTo) support to gzip/inflate.
* Nov 11 2015: Merged [CL 16669](https://go-review.googlesource.com/#/c/16669/4): archive/zip: enable overriding (de)compressors per file
* Oct 15 2015: Added skipping on uncompressible data. Random data speed up >5x.
</details>
# deflate usage
* [High Throughput Benchmark](http://blog.klauspost.com/go-gzipdeflate-benchmarks/).
* [Small Payload/Webserver Benchmarks](http://blog.klauspost.com/gzip-performance-for-go-webservers/).
* [Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/).
* [Re-balancing Deflate Compression Levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/)
The packages are drop-in replacements for standard libraries. Simply replace the import path to use them:
| old import | new import | Documentation
|--------------------|-----------------------------------------|--------------------|
| `compress/gzip` | `github.com/klauspost/compress/gzip` | [gzip](https://pkg.go.dev/github.com/klauspost/compress/gzip?tab=doc)
| `compress/zlib` | `github.com/klauspost/compress/zlib` | [zlib](https://pkg.go.dev/github.com/klauspost/compress/zlib?tab=doc)
| `archive/zip` | `github.com/klauspost/compress/zip` | [zip](https://pkg.go.dev/github.com/klauspost/compress/zip?tab=doc)
| `compress/flate` | `github.com/klauspost/compress/flate` | [flate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc)
* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib).
You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages.
The packages contains the same as the standard library, so you can use the godoc for that: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/), [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/).
Currently there is only minor speedup on decompression (mostly CRC32 calculation).
Memory usage is typically 1MB for a Writer. stdlib is in the same range.
If you expect to have a lot of concurrently allocated Writers consider using
the stateless compress described below.
# Stateless compression
This package offers stateless compression as a special option for gzip/deflate.
It will do compression but without maintaining any state between Write calls.
This means there will be no memory kept between Write calls, but compression and speed will be suboptimal.
This is only relevant in cases where you expect to run many thousands of compressors concurrently,
but with very little activity. This is *not* intended for regular web servers serving individual requests.
Because of this, the size of actual Write calls will affect output size.
In gzip, specify level `-3` / `gzip.StatelessCompression` to enable.
For direct deflate use, NewStatelessWriter and StatelessDeflate are available. See [documentation](https://godoc.org/github.com/klauspost/compress/flate#NewStatelessWriter)
A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer:
```
// replace 'ioutil.Discard' with your output.
gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression)
if err != nil {
return err
}
defer gzw.Close()
w := bufio.NewWriterSize(gzw, 4096)
defer w.Flush()
// Write to 'w'
```
This will only use up to 4KB in memory when the writer is idle.
Compression is almost always worse than the fastest compression level
and each write will allocate (a little) memory.
# Performance Update 2018
It has been a while since we have been looking at the speed of this package compared to the standard library, so I thought I would re-do my tests and give some overall recommendations based on the current state. All benchmarks have been performed with Go 1.10 on my Desktop Intel(R) Core(TM) i7-2600 CPU @3.40GHz. Since I last ran the tests, I have gotten more RAM, which means tests with big files are no longer limited by my SSD.
The raw results are in my [updated spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). Due to cgo changes and upstream updates i could not get the cgo version of gzip to compile. Instead I included the [zstd](https://github.com/datadog/zstd) cgo implementation. If I get cgo gzip to work again, I might replace the results in the sheet.
The columns to take note of are: *MB/s* - the throughput. *Reduction* - the data size reduction in percent of the original. *Rel Speed* relative speed compared to the standard library at the same level. *Smaller* - how many percent smaller is the compressed output compared to stdlib. Negative means the output was bigger. *Loss* means the loss (or gain) in compression as a percentage difference of the input.
The `gzstd` (standard library gzip) and `gzkp` (this package gzip) only uses one CPU core. [`pgzip`](https://github.com/klauspost/pgzip), [`bgzf`](https://github.com/biogo/hts/tree/master/bgzf) uses all 4 cores. [`zstd`](https://github.com/DataDog/zstd) uses one core, and is a beast (but not Go, yet).
## Overall differences.
There appears to be a roughly 5-10% speed advantage over the standard library when comparing at similar compression levels.
The biggest difference you will see is the result of [re-balancing](https://blog.klauspost.com/rebalancing-deflate-compression-levels/) the compression levels. I wanted by library to give a smoother transition between the compression levels than the standard library.
This package attempts to provide a more smooth transition, where "1" is taking a lot of shortcuts, "5" is the reasonable trade-off and "9" is the "give me the best compression", and the values in between gives something reasonable in between. The standard library has big differences in levels 1-4, but levels 5-9 having no significant gains - often spending a lot more time than can be justified by the achieved compression.
There are links to all the test data in the [spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) in the top left field on each tab.
## Web Content
This test set aims to emulate typical use in a web server. The test-set is 4GB data in 53k files, and is a mixture of (mostly) HTML, JS, CSS.
Since level 1 and 9 are close to being the same code, they are quite close. But looking at the levels in-between the differences are quite big.
Looking at level 6, this package is 88% faster, but will output about 6% more data. For a web server, this means you can serve 88% more data, but have to pay for 6% more bandwidth. You can draw your own conclusions on what would be the most expensive for your case.
## Object files
This test is for typical data files stored on a server. In this case it is a collection of Go precompiled objects. They are very compressible.
The picture is similar to the web content, but with small differences since this is very compressible. Levels 2-3 offer good speed, but is sacrificing quite a bit of compression.
The standard library seems suboptimal on level 3 and 4 - offering both worse compression and speed than level 6 & 7 of this package respectively.
## Highly Compressible File
This is a JSON file with very high redundancy. The reduction starts at 95% on level 1, so in real life terms we are dealing with something like a highly redundant stream of data, etc.
It is definitely visible that we are dealing with specialized content here, so the results are very scattered. This package does not do very well at levels 1-4, but picks up significantly at level 5 and levels 7 and 8 offering great speed for the achieved compression.
So if you know you content is extremely compressible you might want to go slightly higher than the defaults. The standard library has a huge gap between levels 3 and 4 in terms of speed (2.75x slowdown), so it offers little "middle ground".
## Medium-High Compressible
This is a pretty common test corpus: [enwik9](http://mattmahoney.net/dc/textdata.html). It contains the first 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. This is a very good test of typical text based compression and more data heavy streams.
We see a similar picture here as in "Web Content". On equal levels some compression is sacrificed for more speed. Level 5 seems to be the best trade-off between speed and size, beating stdlib level 3 in both.
## Medium Compressible
I will combine two test sets, one [10GB file set](http://mattmahoney.net/dc/10gb.html) and a VM disk image (~8GB). Both contain different data types and represent a typical backup scenario.
The most notable thing is how quickly the standard library drops to very low compression speeds around level 5-6 without any big gains in compression. Since this type of data is fairly common, this does not seem like good behavior.
## Un-compressible Content
This is mainly a test of how good the algorithms are at detecting un-compressible input. The standard library only offers this feature with very conservative settings at level 1. Obviously there is no reason for the algorithms to try to compress input that cannot be compressed. The only downside is that it might skip some compressible data on false detections.
## Huffman only compression
This compression library adds a special compression level, named `HuffmanOnly`, which allows near linear time compression. This is done by completely disabling matching of previous data, and only reduce the number of bits to represent each character.
This means that often used characters, like 'e' and ' ' (space) in text use the fewest bits to represent, and rare characters like '¤' takes more bits to represent. For more information see [wikipedia](https://en.wikipedia.org/wiki/Huffman_coding) or this nice [video](https://youtu.be/ZdooBTdW5bM).
Since this type of compression has much less variance, the compression speed is mostly unaffected by the input data, and is usually more than *180MB/s* for a single core.
The downside is that the compression ratio is usually considerably worse than even the fastest conventional compression. The compression ratio can never be better than 8:1 (12.5%).
The linear time compression can be used as a "better than nothing" mode, where you cannot risk the encoder to slow down on some content. For comparison, the size of the "Twain" text is *233460 bytes* (+29% vs. level 1) and encode speed is 144MB/s (4.5x level 1). So in this case you trade a 30% size increase for a 4 times speedup.
For more information see my blog post on [Fast Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/).
This is implemented on Go 1.7 as "Huffman Only" mode, though not exposed for gzip.
# Other packages
Here are other packages of good quality and pure Go (no cgo wrappers or autoconverted code):
* [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression.
* [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression.
* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
# license
This code is licensed under the same conditions as the original Go code. See LICENSE file.

85
vendor/github.com/klauspost/compress/compressible.go generated vendored Normal file
View file

@ -0,0 +1,85 @@
package compress
import "math"
// Estimate returns a normalized compressibility estimate of block b.
// Values close to zero are likely uncompressible.
// Values above 0.1 are likely to be compressible.
// Values above 0.5 are very compressible.
// Very small lengths will return 0.
func Estimate(b []byte) float64 {
if len(b) < 16 {
return 0
}
// Correctly predicted order 1
hits := 0
lastMatch := false
var o1 [256]byte
var hist [256]int
c1 := byte(0)
for _, c := range b {
if c == o1[c1] {
// We only count a hit if there was two correct predictions in a row.
if lastMatch {
hits++
}
lastMatch = true
} else {
lastMatch = false
}
o1[c1] = c
c1 = c
hist[c]++
}
// Use x^0.6 to give better spread
prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)
// Calculate histogram distribution
variance := float64(0)
avg := float64(len(b)) / 256
for _, v := range hist {
Δ := float64(v) - avg
variance += Δ * Δ
}
stddev := math.Sqrt(float64(variance)) / float64(len(b))
exp := math.Sqrt(1 / float64(len(b)))
// Subtract expected stddev
stddev -= exp
if stddev < 0 {
stddev = 0
}
stddev *= 1 + exp
// Use x^0.4 to give better spread
entropy := math.Pow(stddev, 0.4)
// 50/50 weight between prediction and histogram distribution
return math.Pow((prediction+entropy)/2, 0.9)
}
// ShannonEntropyBits returns the number of bits minimum required to represent
// an entropy encoding of the input bytes.
// https://en.wiktionary.org/wiki/Shannon_entropy
func ShannonEntropyBits(b []byte) int {
if len(b) == 0 {
return 0
}
var hist [256]int
for _, c := range b {
hist[c]++
}
shannon := float64(0)
invTotal := 1.0 / float64(len(b))
for _, v := range hist[:] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
}
}
return int(math.Ceil(shannon))
}

4
vendor/github.com/klauspost/compress/gen.sh generated vendored Normal file
View file

@ -0,0 +1,4 @@
#!/bin/sh
cd s2/cmd/_s2sx/ || exit 1
go generate .

View file

@ -161,6 +161,70 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
return s.Out, false, nil
}
// EstimateSizes will estimate the data sizes
func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err error) {
s, err = s.prepare(in)
if err != nil {
return 0, 0, 0, err
}
// Create histogram, if none was provided.
tableSz, dataSz, reuseSz = -1, -1, -1
maxCount := s.maxCount
var canReuse = false
if maxCount == 0 {
maxCount, canReuse = s.countSimple(in)
} else {
canReuse = s.canUseTable(s.prevTable)
}
// We want the output size to be less than this:
wantSize := len(in)
if s.WantLogLess > 0 {
wantSize -= wantSize >> s.WantLogLess
}
// Reset for next run.
s.clearCount = true
s.maxCount = 0
if maxCount >= len(in) {
if maxCount > len(in) {
return 0, 0, 0, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in))
}
if len(in) == 1 {
return 0, 0, 0, ErrIncompressible
}
// One symbol, use RLE
return 0, 0, 0, ErrUseRLE
}
if maxCount == 1 || maxCount < (len(in)>>7) {
// Each symbol present maximum once or too well distributed.
return 0, 0, 0, ErrIncompressible
}
// Calculate new table.
err = s.buildCTable()
if err != nil {
return 0, 0, 0, err
}
if false && !s.canUseTable(s.cTable) {
panic("invalid table generated")
}
tableSz, err = s.cTable.estTableSize(s)
if err != nil {
return 0, 0, 0, err
}
if canReuse {
reuseSz = s.prevTable.estimateSize(s.count[:s.symbolLen])
}
dataSz = s.cTable.estimateSize(s.count[:s.symbolLen])
// Restore
return tableSz, dataSz, reuseSz, nil
}
func (s *Scratch) compress1X(src []byte) ([]byte, error) {
return s.compress1xDo(s.Out, src)
}

View file

@ -20,7 +20,7 @@ type dEntrySingle struct {
// double-symbols decoding
type dEntryDouble struct {
seq uint16
seq [4]byte
nBits uint8
len uint8
}
@ -344,35 +344,241 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
var buf [256]byte
var off uint8
shift := (8 - d.actualTableLog) & 7
switch d.actualTableLog {
case 8:
const shift = 8 - 8
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
for br.off >= 4 {
br.fillFast()
v := dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
dst = append(dst, buf[:]...)
}
case 7:
const shift = 8 - 7
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
case 6:
const shift = 8 - 6
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
case 5:
const shift = 8 - 5
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
case 4:
const shift = 8 - 4
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
case 3:
const shift = 8 - 3
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
case 2:
const shift = 8 - 2
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
case 1:
const shift = 8 - 1
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[uint8(br.value>>(56+shift))]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
default:
return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog)
}
if len(dst)+int(off) > maxDecodedSize {
@ -383,6 +589,8 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
// br < 4, so uint8 is fine
bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
shift := (8 - d.actualTableLog) & 7
for bitsLeft > 0 {
if br.bitsRead >= 64-8 {
for br.off > 0 {
@ -423,24 +631,24 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
var buf [256]byte
var off uint8
const shift = 0
const shift = 56
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
for br.off >= 4 {
br.fillFast()
v := dt[br.peekByteFast()>>shift]
v := dt[uint8(br.value>>shift)]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
v = dt[uint8(br.value>>shift)]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
v = dt[uint8(br.value>>shift)]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
v = dt[uint8(br.value>>shift)]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
@ -474,7 +682,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := dt[br.peekByteFast()>>shift]
v := dt[br.peekByteFast()]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= int8(nBits)
@ -545,23 +753,21 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
br[stream2].fillFast()
val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
val2 := br[stream2].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
v2 := single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
val = br[stream].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
v2 = single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
@ -572,23 +778,21 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
br[stream2].fillFast()
val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
val2 := br[stream2].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
v2 := single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
val = br[stream].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
v2 = single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
@ -706,10 +910,9 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
out := dst
dstEvery := (dstSize + 3) / 4
shift := (8 - d.actualTableLog) & 7
shift := (56 + (8 - d.actualTableLog)) & 63
const tlSize = 1 << 8
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
@ -728,79 +931,91 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
br[stream].fillFast()
br[stream2].fillFast()
br1 := &br[stream]
br2 := &br[stream2]
br1.fillFast()
br2.fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
v := single[uint8(br1.value>>shift)].entry
v2 := single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
buf[off+bufoff*stream+3] = uint8(v >> 8)
}
{
const stream = 2
const stream2 = 3
br[stream].fillFast()
br[stream2].fillFast()
br1 := &br[stream]
br2 := &br[stream2]
br1.fillFast()
br2.fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
v := single[uint8(br1.value>>shift)].entry
v2 := single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
buf[off+bufoff*stream+3] = uint8(v >> 8)
}
off += 4
@ -866,7 +1081,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
}
// Read value and increment offset.
v := single[br.peekByteFast()>>shift].entry
v := single[uint8(br.value>>shift)].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= int(nBits)
@ -914,7 +1129,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
out := dst
dstEvery := (dstSize + 3) / 4
const shift = 0
const shift = 56
const tlSize = 1 << 8
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
@ -938,37 +1153,41 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
v := single[uint8(br[stream].value>>shift)].entry
v2 := single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
{
@ -977,37 +1196,41 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
v := single[uint8(br[stream].value>>shift)].entry
v2 := single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
off += 4
@ -1073,7 +1296,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
}
// Read value and increment offset.
v := single[br.peekByteFast()>>shift].entry
v := single[br.peekByteFast()].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= int(nBits)

View file

@ -245,6 +245,68 @@ func (c cTable) write(s *Scratch) error {
return nil
}
func (c cTable) estTableSize(s *Scratch) (sz int, err error) {
var (
// precomputed conversion table
bitsToWeight [tableLogMax + 1]byte
huffLog = s.actualTableLog
// last weight is not saved.
maxSymbolValue = uint8(s.symbolLen - 1)
huffWeight = s.huffWeight[:256]
)
const (
maxFSETableLog = 6
)
// convert to weight
bitsToWeight[0] = 0
for n := uint8(1); n < huffLog+1; n++ {
bitsToWeight[n] = huffLog + 1 - n
}
// Acquire histogram for FSE.
hist := s.fse.Histogram()
hist = hist[:256]
for i := range hist[:16] {
hist[i] = 0
}
for n := uint8(0); n < maxSymbolValue; n++ {
v := bitsToWeight[c[n].nBits] & 15
huffWeight[n] = v
hist[v]++
}
// FSE compress if feasible.
if maxSymbolValue >= 2 {
huffMaxCnt := uint32(0)
huffMax := uint8(0)
for i, v := range hist[:16] {
if v == 0 {
continue
}
huffMax = byte(i)
if v > huffMaxCnt {
huffMaxCnt = v
}
}
s.fse.HistogramFinished(huffMax, int(huffMaxCnt))
s.fse.TableLog = maxFSETableLog
b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse)
if err == nil && len(b) < int(s.symbolLen>>1) {
sz += 1 + len(b)
return sz, nil
}
// Unable to compress (RLE/uncompressible)
}
// write raw values as 4-bits (max : 15)
if maxSymbolValue > (256 - 128) {
// should not happen : likely means source cannot be compressed
return 0, ErrIncompressible
}
// special case, pack weights 4 bits/weight.
sz += 1 + int(maxSymbolValue/2)
return sz, nil
}
// estimateSize returns the estimated size in bytes of the input represented in the
// histogram supplied.
func (c cTable) estimateSize(hist []uint32) int {

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
package snapref
import (
"encoding/binary"
@ -118,32 +118,23 @@ func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
return true
}
// Read satisfies the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
if r.err != nil {
return 0, r.err
}
for {
if r.i < r.j {
n := copy(p, r.decoded[r.i:r.j])
r.i += n
return n, nil
}
func (r *Reader) fill() error {
for r.i >= r.j {
if !r.readFull(r.buf[:4], true) {
return 0, r.err
return r.err
}
chunkType := r.buf[0]
if !r.readHeader {
if chunkType != chunkTypeStreamIdentifier {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
r.readHeader = true
}
chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
if chunkLen > len(r.buf) {
r.err = ErrUnsupported
return 0, r.err
return r.err
}
// The chunk types are specified at
@ -153,11 +144,11 @@ func (r *Reader) Read(p []byte) (int, error) {
// Section 4.2. Compressed data (chunk type 0x00).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
buf := r.buf[:chunkLen]
if !r.readFull(buf, false) {
return 0, r.err
return r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
buf = buf[checksumSize:]
@ -165,19 +156,19 @@ func (r *Reader) Read(p []byte) (int, error) {
n, err := DecodedLen(buf)
if err != nil {
r.err = err
return 0, r.err
return r.err
}
if n > len(r.decoded) {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
if _, err := Decode(r.decoded, buf); err != nil {
r.err = err
return 0, r.err
return r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
r.i, r.j = 0, n
continue
@ -186,25 +177,25 @@ func (r *Reader) Read(p []byte) (int, error) {
// Section 4.3. Uncompressed data (chunk type 0x01).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
buf := r.buf[:checksumSize]
if !r.readFull(buf, false) {
return 0, r.err
return r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
// Read directly into r.decoded instead of via r.buf.
n := chunkLen - checksumSize
if n > len(r.decoded) {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
if !r.readFull(r.decoded[:n], false) {
return 0, r.err
return r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
r.i, r.j = 0, n
continue
@ -213,15 +204,15 @@ func (r *Reader) Read(p []byte) (int, error) {
// Section 4.1. Stream identifier (chunk type 0xff).
if chunkLen != len(magicBody) {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
if !r.readFull(r.buf[:len(magicBody)], false) {
return 0, r.err
return r.err
}
for i := 0; i < len(magicBody); i++ {
if r.buf[i] != magicBody[i] {
r.err = ErrCorrupt
return 0, r.err
return r.err
}
}
continue
@ -230,12 +221,44 @@ func (r *Reader) Read(p []byte) (int, error) {
if chunkType <= 0x7f {
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
r.err = ErrUnsupported
return 0, r.err
return r.err
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
if !r.readFull(r.buf[:chunkLen], false) {
return 0, r.err
return r.err
}
}
return nil
}
// Read satisfies the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
if r.err != nil {
return 0, r.err
}
if err := r.fill(); err != nil {
return 0, err
}
n := copy(p, r.decoded[r.i:r.j])
r.i += n
return n, nil
}
// ReadByte satisfies the io.ByteReader interface.
func (r *Reader) ReadByte() (byte, error) {
if r.err != nil {
return 0, r.err
}
if err := r.fill(); err != nil {
return 0, err
}
c := r.decoded[r.i]
r.i++
return c, nil
}

View file

@ -2,9 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!arm64 appengine !gc noasm
package snappy
package snapref
// decode writes the decoding of src to dst. It assumes that the varint-encoded
// length of the decompressed bytes has already been read, and that len(dst)

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
package snapref
import (
"encoding/binary"

View file

@ -2,9 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!arm64 appengine !gc noasm
package snappy
package snapref
func load32(b []byte, i int) uint32 {
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package snappy implements the Snappy compression format. It aims for very
// Package snapref implements the Snappy compression format. It aims for very
// high speeds and reasonable compression.
//
// There are actually two Snappy formats: block and stream. They are related,
@ -17,7 +17,7 @@
//
// The canonical, C++ implementation is at https://github.com/google/snappy and
// it only implements the block format.
package snappy // import "github.com/golang/snappy"
package snapref
import (
"hash/crc32"

4
vendor/github.com/klauspost/compress/s2sx.mod generated vendored Normal file
View file

@ -0,0 +1,4 @@
module github.com/klauspost/compress
go 1.16

0
vendor/github.com/klauspost/compress/s2sx.sum generated vendored Normal file
View file

View file

@ -152,7 +152,7 @@ file out level insize outsize millis mb/s
silesia.tar zskp 1 211947520 73101992 643 313.87
silesia.tar zskp 2 211947520 67504318 969 208.38
silesia.tar zskp 3 211947520 64595893 2007 100.68
silesia.tar zskp 4 211947520 60995370 7691 26.28
silesia.tar zskp 4 211947520 60995370 8825 22.90
cgo zstd:
silesia.tar zstd 1 211947520 73605392 543 371.56
@ -162,7 +162,7 @@ silesia.tar zstd 9 211947520 60212393 5063 39.92
gzip, stdlib/this package:
silesia.tar gzstd 1 211947520 80007735 1654 122.21
silesia.tar gzkp 1 211947520 80369488 1168 173.06
silesia.tar gzkp 1 211947520 80136201 1152 175.45
GOB stream of binary data. Highly compressible.
https://files.klauspost.com/compress/gob-stream.7z
@ -171,13 +171,15 @@ file out level insize outsize millis mb/s
gob-stream zskp 1 1911399616 235022249 3088 590.30
gob-stream zskp 2 1911399616 205669791 3786 481.34
gob-stream zskp 3 1911399616 175034659 9636 189.17
gob-stream zskp 4 1911399616 167273881 29337 62.13
gob-stream zskp 4 1911399616 165609838 50369 36.19
gob-stream zstd 1 1911399616 249810424 2637 691.26
gob-stream zstd 3 1911399616 208192146 3490 522.31
gob-stream zstd 6 1911399616 193632038 6687 272.56
gob-stream zstd 9 1911399616 177620386 16175 112.70
gob-stream gzstd 1 1911399616 357382641 10251 177.82
gob-stream gzkp 1 1911399616 362156523 5695 320.08
gob-stream gzkp 1 1911399616 359753026 5438 335.20
The test data for the Large Text Compression Benchmark is the first
10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
@ -187,11 +189,13 @@ file out level insize outsize millis mb/s
enwik9 zskp 1 1000000000 343848582 3609 264.18
enwik9 zskp 2 1000000000 317276632 5746 165.97
enwik9 zskp 3 1000000000 292243069 12162 78.41
enwik9 zskp 4 1000000000 275241169 36430 26.18
enwik9 zskp 4 1000000000 262183768 82837 11.51
enwik9 zstd 1 1000000000 358072021 3110 306.65
enwik9 zstd 3 1000000000 313734672 4784 199.35
enwik9 zstd 6 1000000000 295138875 10290 92.68
enwik9 zstd 9 1000000000 278348700 28549 33.40
enwik9 gzstd 1 1000000000 382578136 9604 99.30
enwik9 gzkp 1 1000000000 383825945 6544 145.73
@ -202,13 +206,15 @@ file out level insize outsize millis mb/s
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78
github-june-2days-2019.json zskp 4 6273951764 470320075 170190 35.16
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03
github-june-2days-2019.json gzkp 1 6273951764 1125417694 21788 274.61
VM Image, Linux mint with a few installed applications:
https://files.klauspost.com/compress/rawstudio-mint14.7z
@ -217,13 +223,15 @@ file out level insize outsize millis mb/s
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16
rawstudio-mint14.tar zskp 4 8558382592 2965110639 857750 9.52
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49
rawstudio-mint14.tar gzkp 1 8558382592 3962605659 45113 180.92
CSV data:
https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
@ -232,13 +240,15 @@ file out level insize outsize millis mb/s
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10
nyc-taxi-data-10M.csv zskp 4 3325605752 476268884 135958 23.33
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53
nyc-taxi-data-10M.csv gzkp 1 3325605752 922257165 16780 189.00
```
## Decompressor

View file

@ -50,16 +50,23 @@ func (b *bitReader) getBits(n uint8) int {
if n == 0 /*|| b.bitsRead >= 64 */ {
return 0
}
return b.getBitsFast(n)
return int(b.get32BitsFast(n))
}
// getBitsFast requires that at least one bit is requested every time.
// get32BitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReader) getBitsFast(n uint8) int {
func (b *bitReader) get32BitsFast(n uint8) uint32 {
const regMask = 64 - 1
v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return int(v)
return v
}
func (b *bitReader) get16BitsFast(n uint8) uint16 {
const regMask = 64 - 1
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return v
}
// fillFast() will make sure at least 32 bits are available.

View file

@ -38,7 +38,7 @@ func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
b.nBits += bits
}
// addBits32NC will add up to 32 bits.
// addBits32NC will add up to 31 bits.
// It will not check if there is space for them,
// so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
@ -46,6 +46,26 @@ func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
b.nBits += bits
}
// addBits64NC will add up to 64 bits.
// There must be space for 32 bits.
func (b *bitWriter) addBits64NC(value uint64, bits uint8) {
if bits <= 31 {
b.addBits32Clean(uint32(value), bits)
return
}
b.addBits32Clean(uint32(value), 32)
b.flush32()
b.addBits32Clean(uint32(value>>32), bits-32)
}
// addBits32Clean will add up to 32 bits.
// It will not check if there is space for them.
// The input must not contain more bits than specified.
func (b *bitWriter) addBits32Clean(value uint32, bits uint8) {
b.bitContainer |= uint64(value) << (b.nBits & 63)
b.nBits += bits
}
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {

View file

@ -76,12 +76,11 @@ type blockDec struct {
// Window size of the block.
WindowSize uint64
history chan *history
input chan struct{}
result chan decodeOutput
sequenceBuf []seq
err error
decWG sync.WaitGroup
history chan *history
input chan struct{}
result chan decodeOutput
err error
decWG sync.WaitGroup
// Frame to use for singlethreaded decoding.
// Should not be used by the decoder itself since parent may be another frame.
@ -144,7 +143,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
}
cSize = 1
case blockTypeCompressed:
if debug {
if debugDecoder {
println("Data size on stream:", cSize)
}
b.RLESize = 0
@ -153,7 +152,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
maxSize = int(windowSize)
}
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
if debug {
if debugDecoder {
printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
}
return ErrCompressedSizeTooBig
@ -168,10 +167,10 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
// Read block data.
if cap(b.dataStorage) < cSize {
if b.lowMem {
if b.lowMem || cSize > maxCompressedBlockSize {
b.dataStorage = make([]byte, 0, cSize)
} else {
b.dataStorage = make([]byte, 0, maxBlockSize)
b.dataStorage = make([]byte, 0, maxCompressedBlockSize)
}
}
if cap(b.dst) <= maxSize {
@ -179,7 +178,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
}
b.data, err = br.readBig(cSize, b.dataStorage)
if err != nil {
if debug {
if debugDecoder {
println("Reading block:", err, "(", cSize, ")", len(b.data))
printf("%T", br)
}
@ -249,7 +248,7 @@ func (b *blockDec) startDecoder() {
b: b.dst,
err: err,
}
if debug {
if debugDecoder {
println("Decompressed to", len(b.dst), "bytes, error:", err)
}
b.result <- o
@ -264,7 +263,7 @@ func (b *blockDec) startDecoder() {
default:
panic("Invalid block type")
}
if debug {
if debugDecoder {
println("blockDec: Finished block")
}
}
@ -297,7 +296,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
b.dst = hist.b
hist.b = nil
err := b.decodeCompressed(hist)
if debug {
if debugDecoder {
println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
}
hist.b = b.dst
@ -390,7 +389,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
in = in[5:]
}
}
if debug {
if debugDecoder {
println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
}
var literals []byte
@ -428,7 +427,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
literals[i] = v
}
in = in[1:]
if debug {
if debugDecoder {
printf("Found %d RLE compressed literals\n", litRegenSize)
}
case literalsBlockTreeless:
@ -439,7 +438,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
// Store compressed literals, so we defer decoding until we get history.
literals = in[:litCompSize]
in = in[litCompSize:]
if debug {
if debugDecoder {
printf("Found %d compressed literals\n", litCompSize)
}
case literalsBlockCompressed:
@ -481,7 +480,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
if len(literals) != litRegenSize {
return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
if debug {
if debugDecoder {
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
}
}
@ -512,18 +511,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
in = in[3:]
}
// Allocate sequences
if cap(b.sequenceBuf) < nSeqs {
if b.lowMem {
b.sequenceBuf = make([]seq, nSeqs)
} else {
// Allocate max
b.sequenceBuf = make([]seq, nSeqs, maxSequences)
}
} else {
// Reuse buffer
b.sequenceBuf = b.sequenceBuf[:nSeqs]
}
var seqs = &sequenceDecs{}
if nSeqs > 0 {
if len(in) < 1 {
@ -532,12 +520,12 @@ func (b *blockDec) decodeCompressed(hist *history) error {
br := byteReader{b: in, off: 0}
compMode := br.Uint8()
br.advance(1)
if debug {
if debugDecoder {
printf("Compression modes: 0b%b", compMode)
}
for i := uint(0); i < 3; i++ {
mode := seqCompMode((compMode >> (6 - i*2)) & 3)
if debug {
if debugDecoder {
println("Table", tableIndex(i), "is", mode)
}
var seq *sequenceDec
@ -568,7 +556,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
dec.setRLE(symb)
seq.fse = dec
if debug {
if debugDecoder {
printf("RLE set to %+v, code: %v", symb, v)
}
case compModeFSE:
@ -584,7 +572,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
println("Transform table error:", err)
return err
}
if debug {
if debugDecoder {
println("Read table ok", "symbolLen:", dec.symbolLen)
}
seq.fse = dec
@ -652,7 +640,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
if huff != nil {
hist.huffTree = huff
}
if debug {
if debugDecoder {
println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.")
}
@ -669,7 +657,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
if err != nil {
return err
}
if debug {
if debugDecoder {
println("History merged ok")
}
br := &bitReader{}
@ -728,7 +716,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
hist.append(b.dst)
hist.recentOffsets = seqs.prevOffset
if debug {
if debugDecoder {
println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.")
}

View file

@ -51,7 +51,7 @@ func (b *blockEnc) init() {
if cap(b.literals) < maxCompressedBlockSize {
b.literals = make([]byte, 0, maxCompressedBlockSize)
}
const defSeqs = 200
const defSeqs = 2000
if cap(b.sequences) < defSeqs {
b.sequences = make([]seq, 0, defSeqs)
}
@ -156,7 +156,7 @@ func (h *literalsHeader) setSize(regenLen int) {
switch {
case inBits < 5:
lh |= (uint64(regenLen) << 3) | (1 << 60)
if debug {
if debugEncoder {
got := int(lh>>3) & 0xff
if got != regenLen {
panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
@ -184,7 +184,7 @@ func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
lh |= 1 << 2
}
lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if debug {
if debugEncoder {
const mmask = (1 << 24) - 1
n := (lh >> 4) & mmask
if int(n&1023) != inLen {
@ -312,7 +312,7 @@ func (b *blockEnc) encodeRaw(a []byte) {
bh.setType(blockTypeRaw)
b.output = bh.appendTo(b.output[:0])
b.output = append(b.output, a...)
if debug {
if debugEncoder {
println("Adding RAW block, length", len(a), "last:", b.last)
}
}
@ -325,7 +325,7 @@ func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
bh.setType(blockTypeRaw)
dst = bh.appendTo(dst)
dst = append(dst, src...)
if debug {
if debugEncoder {
println("Adding RAW block, length", len(src), "last:", b.last)
}
return dst
@ -339,7 +339,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
// Don't compress extremely small blocks
if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
if debug {
if debugEncoder {
println("Adding RAW block, length", len(lits), "last:", b.last)
}
bh.setType(blockTypeRaw)
@ -371,7 +371,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
switch err {
case huff0.ErrIncompressible:
if debug {
if debugEncoder {
println("Adding RAW block, length", len(lits), "last:", b.last)
}
bh.setType(blockTypeRaw)
@ -379,7 +379,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
b.output = append(b.output, lits...)
return nil
case huff0.ErrUseRLE:
if debug {
if debugEncoder {
println("Adding RLE block, length", len(lits))
}
bh.setType(blockTypeRLE)
@ -396,12 +396,12 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
bh.setType(blockTypeCompressed)
var lh literalsHeader
if reUsed {
if debug {
if debugEncoder {
println("Reused tree, compressed to", len(out))
}
lh.setType(literalsBlockTreeless)
} else {
if debug {
if debugEncoder {
println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
}
lh.setType(literalsBlockCompressed)
@ -426,7 +426,7 @@ func fuzzFseEncoder(data []byte) int {
return 0
}
enc := fseEncoder{}
hist := enc.Histogram()[:256]
hist := enc.Histogram()
maxSym := uint8(0)
for i, v := range data {
v = v & 63
@ -517,7 +517,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
lh.setSize(len(b.literals))
b.output = lh.appendTo(b.output)
b.output = append(b.output, b.literals...)
if debug {
if debugEncoder {
println("Adding literals RAW, length", len(b.literals))
}
case huff0.ErrUseRLE:
@ -525,22 +525,22 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
lh.setSize(len(b.literals))
b.output = lh.appendTo(b.output)
b.output = append(b.output, b.literals[0])
if debug {
if debugEncoder {
println("Adding literals RLE")
}
case nil:
// Compressed litLen...
if reUsed {
if debug {
if debugEncoder {
println("reused tree")
}
lh.setType(literalsBlockTreeless)
} else {
if debug {
if debugEncoder {
println("new tree, size:", len(b.litEnc.OutTable))
}
lh.setType(literalsBlockCompressed)
if debug {
if debugEncoder {
_, _, err := huff0.ReadTable(out, nil)
if err != nil {
panic(err)
@ -548,18 +548,18 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
}
}
lh.setSizes(len(out), len(b.literals), single)
if debug {
if debugEncoder {
printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
println("Adding literal header:", lh)
}
b.output = lh.appendTo(b.output)
b.output = append(b.output, out...)
b.litEnc.Reuse = huff0.ReusePolicyAllow
if debug {
if debugEncoder {
println("Adding literals compressed")
}
default:
if debug {
if debugEncoder {
println("Adding literals ERROR:", err)
}
return err
@ -577,7 +577,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
n := len(b.sequences) - 0x7f00
b.output = append(b.output, 255, uint8(n), uint8(n>>8))
}
if debug {
if debugEncoder {
println("Encoding", len(b.sequences), "sequences")
}
b.genCodes()
@ -611,17 +611,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
nSize = nSize + (nSize+2*8*16)>>4
switch {
case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
if debug {
if debugEncoder {
println("Using predefined", predefSize>>3, "<=", nSize>>3)
}
return preDef, compModePredefined
case prevSize <= nSize:
if debug {
if debugEncoder {
println("Using previous", prevSize>>3, "<=", nSize>>3)
}
return prev, compModeRepeat
default:
if debug {
if debugEncoder {
println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
}
@ -634,7 +634,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if llEnc.useRLE {
mode |= uint8(compModeRLE) << 6
llEnc.setRLE(b.sequences[0].llCode)
if debug {
if debugEncoder {
println("llEnc.useRLE")
}
} else {
@ -645,7 +645,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if ofEnc.useRLE {
mode |= uint8(compModeRLE) << 4
ofEnc.setRLE(b.sequences[0].ofCode)
if debug {
if debugEncoder {
println("ofEnc.useRLE")
}
} else {
@ -657,7 +657,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if mlEnc.useRLE {
mode |= uint8(compModeRLE) << 2
mlEnc.setRLE(b.sequences[0].mlCode)
if debug {
if debugEncoder {
println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
}
} else {
@ -666,7 +666,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
mode |= uint8(m) << 2
}
b.output = append(b.output, mode)
if debug {
if debugEncoder {
printf("Compression modes: 0b%b", mode)
}
b.output, err = llEnc.writeCount(b.output)
@ -722,52 +722,53 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
}
seq--
if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 {
// No need to flush (common)
for seq >= 0 {
s = b.sequences[seq]
wr.flush32()
llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
// tabelog max is 8 for all.
of.encode(ofB)
ml.encode(mlB)
ll.encode(llB)
wr.flush32()
// Store sequences in reverse...
for seq >= 0 {
s = b.sequences[seq]
// We checked that all can stay within 32 bits
wr.addBits32NC(s.litLen, llB.outBits)
wr.addBits32NC(s.matchLen, mlB.outBits)
wr.addBits32NC(s.offset, ofB.outBits)
ofB := ofTT[s.ofCode]
wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits.
//of.encode(ofB)
nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16
dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState)
wr.addBits16NC(of.state, uint8(nbBitsOut))
of.state = of.stateTable[dstState]
if debugSequences {
println("Encoded seq", seq, s)
}
// Accumulate extra bits.
outBits := ofB.outBits & 31
extraBits := uint64(s.offset & bitMask32[outBits])
extraBitsN := outBits
seq--
mlB := mlTT[s.mlCode]
//ml.encode(mlB)
nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16
dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState)
wr.addBits16NC(ml.state, uint8(nbBitsOut))
ml.state = ml.stateTable[dstState]
outBits = mlB.outBits & 31
extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits])
extraBitsN += outBits
llB := llTT[s.llCode]
//ll.encode(llB)
nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16
dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState)
wr.addBits16NC(ll.state, uint8(nbBitsOut))
ll.state = ll.stateTable[dstState]
outBits = llB.outBits & 31
extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits])
extraBitsN += outBits
wr.flush32()
wr.addBits64NC(extraBits, extraBitsN)
if debugSequences {
println("Encoded seq", seq, s)
}
} else {
for seq >= 0 {
s = b.sequences[seq]
wr.flush32()
llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
// tabelog max is below 8 for each.
of.encode(ofB)
ml.encode(mlB)
ll.encode(llB)
wr.flush32()
// ml+ll = max 32 bits total
wr.addBits32NC(s.litLen, llB.outBits)
wr.addBits32NC(s.matchLen, mlB.outBits)
wr.flush32()
wr.addBits32NC(s.offset, ofB.outBits)
if debugSequences {
println("Encoded seq", seq, s)
}
seq--
}
seq--
}
ml.flush(mlEnc.actualTableLog)
of.flush(ofEnc.actualTableLog)
@ -786,7 +787,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
// Size is output minus block header.
bh.setSize(uint32(len(b.output)-bhOffset) - 3)
if debug {
if debugEncoder {
println("Rewriting block header", bh)
}
_ = bh.appendTo(b.output[bhOffset:bhOffset])
@ -801,14 +802,13 @@ func (b *blockEnc) genCodes() {
// nothing to do
return
}
if len(b.sequences) > math.MaxUint16 {
panic("can only encode up to 64K sequences")
}
// No bounds checks after here:
llH := b.coders.llEnc.Histogram()[:256]
ofH := b.coders.ofEnc.Histogram()[:256]
mlH := b.coders.mlEnc.Histogram()[:256]
llH := b.coders.llEnc.Histogram()
ofH := b.coders.ofEnc.Histogram()
mlH := b.coders.mlEnc.Histogram()
for i := range llH {
llH[i] = 0
}
@ -820,7 +820,8 @@ func (b *blockEnc) genCodes() {
}
var llMax, ofMax, mlMax uint8
for i, seq := range b.sequences {
for i := range b.sequences {
seq := &b.sequences[i]
v := llCode(seq.litLen)
seq.llCode = v
llH[v]++
@ -844,7 +845,6 @@ func (b *blockEnc) genCodes() {
panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
}
}
b.sequences[i] = seq
}
maxCount := func(a []uint32) int {
var max uint32

View file

@ -91,7 +91,7 @@ func (r *readerWrapper) readSmall(n int) ([]byte, error) {
if err == io.EOF {
return nil, io.ErrUnexpectedEOF
}
if debug {
if debugDecoder {
println("readSmall: got", n2, "want", n, "err", err)
}
return nil, err

View file

@ -5,6 +5,7 @@ package zstd
import (
"bytes"
"encoding/binary"
"errors"
"io"
)
@ -15,18 +16,50 @@ const HeaderMaxSize = 14 + 3
// Header contains information about the first frame and block within that.
type Header struct {
// Window Size the window of data to keep while decoding.
// Will only be set if HasFCS is false.
WindowSize uint64
// SingleSegment specifies whether the data is to be decompressed into a
// single contiguous memory segment.
// It implies that WindowSize is invalid and that FrameContentSize is valid.
SingleSegment bool
// Frame content size.
// Expected size of the entire frame.
FrameContentSize uint64
// WindowSize is the window of data to keep while decoding.
// Will only be set if SingleSegment is false.
WindowSize uint64
// Dictionary ID.
// If 0, no dictionary.
DictionaryID uint32
// HasFCS specifies whether FrameContentSize has a valid value.
HasFCS bool
// FrameContentSize is the expected uncompressed size of the entire frame.
FrameContentSize uint64
// Skippable will be true if the frame is meant to be skipped.
// This implies that FirstBlock.OK is false.
Skippable bool
// SkippableID is the user-specific ID for the skippable frame.
// Valid values are between 0 to 15, inclusive.
SkippableID int
// SkippableSize is the length of the user data to skip following
// the header.
SkippableSize uint32
// HeaderSize is the raw size of the frame header.
//
// For normal frames, it includes the size of the magic number and
// the size of the header (per section 3.1.1.1).
// It does not include the size for any data blocks (section 3.1.1.2) nor
// the size for the trailing content checksum.
//
// For skippable frames, this counts the size of the magic number
// along with the size of the size field of the payload.
// It does not include the size of the skippable payload itself.
// The total frame size is the HeaderSize plus the SkippableSize.
HeaderSize int
// First block information.
FirstBlock struct {
// OK will be set if first block could be decoded.
@ -51,17 +84,9 @@ type Header struct {
CompressedSize int
}
// Skippable will be true if the frame is meant to be skipped.
// No other information will be populated.
Skippable bool
// If set there is a checksum present for the block content.
// The checksum field at the end is always 4 bytes long.
HasCheckSum bool
// If this is true FrameContentSize will have a valid value
HasFCS bool
SingleSegment bool
}
// Decode the header from the beginning of the stream.
@ -71,39 +96,46 @@ type Header struct {
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error {
*h = Header{}
if len(in) < 4 {
return io.ErrUnexpectedEOF
}
h.HeaderSize += 4
b, in := in[:4], in[4:]
if !bytes.Equal(b, frameMagic) {
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch
}
*h = Header{Skippable: true}
if len(in) < 4 {
return io.ErrUnexpectedEOF
}
h.HeaderSize += 4
h.Skippable = true
h.SkippableID = int(b[0] & 0xf)
h.SkippableSize = binary.LittleEndian.Uint32(in)
return nil
}
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
// Clear output
*h = Header{}
fhd, in := in[0], in[1:]
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header")
}
// Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
fhd, in := in[0], in[1:]
h.HeaderSize++
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header")
}
if !h.SingleSegment {
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
var wd byte
wd, in = in[0], in[1:]
h.HeaderSize++
windowLog := 10 + (wd >> 3)
windowBase := uint64(1) << windowLog
windowAdd := (windowBase / 8) * uint64(wd&0x7)
@ -120,9 +152,7 @@ func (h *Header) Decode(in []byte) error {
return io.ErrUnexpectedEOF
}
b, in = in[:size], in[size:]
if b == nil {
return io.ErrUnexpectedEOF
}
h.HeaderSize += int(size)
switch size {
case 1:
h.DictionaryID = uint32(b[0])
@ -152,9 +182,7 @@ func (h *Header) Decode(in []byte) error {
return io.ErrUnexpectedEOF
}
b, in = in[:fcsSize], in[fcsSize:]
if b == nil {
return io.ErrUnexpectedEOF
}
h.HeaderSize += int(fcsSize)
switch fcsSize {
case 1:
h.FrameContentSize = uint64(b[0])

View file

@ -113,9 +113,6 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
// Returns the number of bytes written and any error that occurred.
// When the stream is done, io.EOF will be returned.
func (d *Decoder) Read(p []byte) (int, error) {
if d.stream == nil {
return 0, ErrDecoderNilInput
}
var n int
for {
if len(d.current.b) > 0 {
@ -138,7 +135,7 @@ func (d *Decoder) Read(p []byte) (int, error) {
}
}
if len(d.current.b) > 0 {
if debug {
if debugDecoder {
println("returning", n, "still bytes left:", len(d.current.b))
}
// Only return error at end of block
@ -147,7 +144,7 @@ func (d *Decoder) Read(p []byte) (int, error) {
if d.current.err != nil {
d.drainOutput()
}
if debug {
if debugDecoder {
println("returning", n, d.current.err, len(d.decoders))
}
return n, d.current.err
@ -167,20 +164,17 @@ func (d *Decoder) Reset(r io.Reader) error {
if r == nil {
d.current.err = ErrDecoderNilInput
if len(d.current.b) > 0 {
d.current.b = d.current.b[:0]
}
d.current.flushed = true
return nil
}
if d.stream == nil {
d.stream = make(chan decodeStream, 1)
d.streamWg.Add(1)
go d.startStreamDecoder(d.stream)
}
// If bytes buffer and < 1MB, do sync decoding anyway.
if bb, ok := r.(byter); ok && bb.Len() < 1<<20 {
// If bytes buffer and < 5MB, do sync decoding anyway.
if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
bb2 := bb
if debug {
if debugDecoder {
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
}
b := bb2.Bytes()
@ -196,12 +190,18 @@ func (d *Decoder) Reset(r io.Reader) error {
d.current.b = dst
d.current.err = err
d.current.flushed = true
if debug {
if debugDecoder {
println("sync decode to", len(dst), "bytes, err:", err)
}
return nil
}
if d.stream == nil {
d.stream = make(chan decodeStream, 1)
d.streamWg.Add(1)
go d.startStreamDecoder(d.stream)
}
// Remove current block.
d.current.decodeOutput = decodeOutput{}
d.current.err = nil
@ -225,7 +225,7 @@ func (d *Decoder) drainOutput() {
d.current.cancel = nil
}
if d.current.d != nil {
if debug {
if debugDecoder {
printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders))
}
d.decoders <- d.current.d
@ -238,7 +238,7 @@ func (d *Decoder) drainOutput() {
}
for v := range d.current.output {
if v.d != nil {
if debug {
if debugDecoder {
printf("re-adding decoder %p", v.d)
}
d.decoders <- v.d
@ -255,17 +255,15 @@ func (d *Decoder) drainOutput() {
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
if d.stream == nil {
return 0, ErrDecoderNilInput
}
var n int64
for {
if len(d.current.b) > 0 {
n2, err2 := w.Write(d.current.b)
n += int64(n2)
if err2 != nil && d.current.err == nil {
if err2 != nil && (d.current.err == nil || d.current.err == io.EOF) {
d.current.err = err2
break
} else if n2 != len(d.current.b) {
d.current.err = io.ErrShortWrite
}
}
if d.current.err != nil {
@ -297,7 +295,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
block := <-d.decoders
frame := block.localFrame
defer func() {
if debug {
if debugDecoder {
printf("re-adding decoder: %p", block)
}
frame.rawInput = nil
@ -310,7 +308,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
frame.history.reset()
err := frame.reset(&frame.bBuf)
if err == io.EOF {
if debug {
if debugDecoder {
println("frame reset return EOF")
}
return dst, nil
@ -355,7 +353,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
return dst, err
}
if len(frame.bBuf) == 0 {
if debug {
if debugDecoder {
println("frame dbuf empty")
}
break
@ -371,7 +369,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
// if no data was available without blocking.
func (d *Decoder) nextBlock(blocking bool) (ok bool) {
if d.current.d != nil {
if debug {
if debugDecoder {
printf("re-adding current decoder %p", d.current.d)
}
d.decoders <- d.current.d
@ -391,7 +389,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
return false
}
}
if debug {
if debugDecoder {
println("got", len(d.current.b), "bytes, error:", d.current.err)
}
return true
@ -485,7 +483,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
defer d.streamWg.Done()
frame := newFrameDec(d.o)
for stream := range inStream {
if debug {
if debugDecoder {
println("got new stream")
}
br := readerWrapper{r: stream.r}
@ -493,7 +491,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
for {
frame.history.reset()
err := frame.reset(&br)
if debug && err != nil {
if debugDecoder && err != nil {
println("Frame decoder returned", err)
}
if err == nil && frame.DictionaryID != nil {
@ -510,7 +508,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
}
break
}
if debug {
if debugDecoder {
println("starting frame decoder")
}

View file

@ -17,14 +17,16 @@ type decoderOptions struct {
lowMem bool
concurrent int
maxDecodedSize uint64
maxWindowSize uint64
dicts []dict
}
func (o *decoderOptions) setDefault() {
*o = decoderOptions{
// use less ram: true for now, but may change.
lowMem: true,
concurrent: runtime.GOMAXPROCS(0),
lowMem: true,
concurrent: runtime.GOMAXPROCS(0),
maxWindowSize: MaxWindowSize,
}
o.maxDecodedSize = 1 << 63
}
@ -52,7 +54,6 @@ func WithDecoderConcurrency(n int) DOption {
// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
// non-streaming operations or maximum window size for streaming operations.
// This can be used to control memory usage of potentially hostile content.
// For streaming operations, the maximum window size is capped at 1<<30 bytes.
// Maximum and default is 1 << 63 bytes.
func WithDecoderMaxMemory(n uint64) DOption {
return func(o *decoderOptions) error {
@ -81,3 +82,21 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
return nil
}
}
// WithDecoderMaxWindow allows to set a maximum window size for decodes.
// This allows rejecting packets that will cause big memory usage.
// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.
// If WithDecoderMaxMemory is set to a lower value, that will be used.
// Default is 512MB, Maximum is ~3.75 TB as per zstandard spec.
func WithDecoderMaxWindow(size uint64) DOption {
return func(o *decoderOptions) error {
if size < MinWindowSize {
return errors.New("WithMaxWindowSize must be at least 1KB, 1024 bytes")
}
if size > (1<<41)+7*(1<<38) {
return errors.New("WithMaxWindowSize must be less than (1<<41) + 7*(1<<38) ~ 3.75TB")
}
o.maxWindowSize = size
return nil
}
}

View file

@ -82,7 +82,7 @@ func loadDict(b []byte) (*dict, error) {
println("Transform table error:", err)
return err
}
if debug {
if debugDecoder || debugEncoder {
println("Read table ok", "symbolLen:", dec.symbolLen)
}
// Set decoders as predefined so they aren't reused.

View file

@ -38,8 +38,8 @@ func (e *fastBase) AppendCRC(dst []byte) []byte {
// WindowSize returns the window size of the encoder,
// or a window size small enough to contain the input size, if > 0.
func (e *fastBase) WindowSize(size int) int32 {
if size > 0 && size < int(e.maxMatchOff) {
func (e *fastBase) WindowSize(size int64) int32 {
if size > 0 && size < int64(e.maxMatchOff) {
b := int32(1) << uint(bits.Len(uint(size)))
// Keep minimum window.
if b < 1024 {
@ -108,11 +108,6 @@ func (e *fastBase) UseBlock(enc *blockEnc) {
e.blk = enc
}
func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}
func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
if debugAsserts {
if s < 0 {
@ -131,9 +126,24 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
a := src[s:]
b := src[t:]
b = b[:len(a)]
end := int32((len(a) >> 3) << 3)
for i := int32(0); i < end; i += 8 {
if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
return i + int32(bits.TrailingZeros64(diff)>>3)
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
a = a[end:]
b = b[end:]
for i := range a {
if a[i] != b[i] {
return int32(i) + end
}
}
return int32(len(a)) + end
}
// Reset the encoding table.

View file

@ -5,22 +5,61 @@
package zstd
import (
"bytes"
"fmt"
"math/bits"
"github.com/klauspost/compress"
)
const (
bestLongTableBits = 20 // Bits used in the long match table
bestLongTableBits = 22 // Bits used in the long match table
bestLongTableSize = 1 << bestLongTableBits // Size of the table
bestLongLen = 8 // Bytes used for table hash
// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
// long match table and match offsets are quite big.
// This greatly depends on the type of input.
bestShortTableBits = 16 // Bits used in the short match table
bestShortTableBits = 18 // Bits used in the short match table
bestShortTableSize = 1 << bestShortTableBits // Size of the table
bestShortLen = 4 // Bytes used for table hash
)
type match struct {
offset int32
s int32
length int32
rep int32
est int32
}
const highScore = 25000
// estBits will estimate output bits from predefined tables.
func (m *match) estBits(bitsPerByte int32) {
mlc := mlCode(uint32(m.length - zstdMinMatch))
var ofc uint8
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
// Add cost of match encoding...
m.est = int32(ofTT.outBits + mlTT.outBits)
m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
// Subtract savings compared to literal encoding...
m.est -= (m.length * bitsPerByte) >> 10
if m.est > 0 {
// Unlikely gain..
m.length = 0
m.est = highScore
}
}
// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
// The long match table contains the previous entry with the same hash,
// effectively making it a "chain" of length 2.
@ -109,6 +148,14 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
return
}
// Use this to estimate literal cost.
// Scaled by 10 bits.
bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
// Huffman can never go < 1 bit/byte
if bitsPerByte < 1024 {
bitsPerByte = 1024
}
// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
@ -132,7 +179,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
}
_ = addLiterals
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -145,51 +192,49 @@ encodeLoop:
panic("offset0 was 0")
}
type match struct {
offset int32
s int32
length int32
rep int32
}
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{offset: offset, s: s}
}
return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
}
bestOf := func(a, b match) match {
aScore := b.s - a.s + a.length
bScore := a.s - b.s + b.length
if a.rep < 0 {
aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
}
if b.rep < 0 {
bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
}
if aScore >= bScore {
if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
return a
}
return b
}
const goodEnough = 100
nextHashL := hash8(cv, bestLongTableBits)
nextHashS := hash4x64(cv, bestShortTableBits)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{s: s, est: highScore}
}
if debugAsserts {
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
m.estBits(bitsPerByte)
return m
}
best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
if canRepeat && best.length < goodEnough {
best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
cv32 := uint32(cv >> 8)
spp := s + 1
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
if best.length > 0 {
best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
cv32 = uint32(cv >> 24)
spp += 2
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
}
}
// Load next and check...
@ -209,22 +254,28 @@ encodeLoop:
}
s++
candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
cv = load6432(src, s)
cv2 := load6432(src, s+1)
candidateL = e.longTable[hash8(cv, bestLongTableBits)]
candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
// Short at s+1
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
// Long at s+1, s+2
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
if false {
// Short at s+3.
// Too often worse...
best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
}
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
if sAt := best.s + best.length; sAt < sLimit {
nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
candidateEnd := e.longTable[nextHashL]
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
@ -236,6 +287,12 @@ encodeLoop:
}
}
if debugAsserts {
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
}
}
// We have a match, we can store the forward value
if best.rep > 0 {
s = best.s
@ -274,7 +331,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, best.length)
}
@ -284,8 +341,8 @@ encodeLoop:
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
off++
@ -311,7 +368,7 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && canRepeat && int(offset1) > len(src) {
if debugAsserts && int(offset1) > len(src) {
panic("invalid offset")
}
@ -352,8 +409,8 @@ encodeLoop:
// every entry
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
@ -374,8 +431,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash4x64(cv, bestShortTableBits)
nextHashL := hash8(cv, bestLongTableBits)
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@ -412,7 +469,7 @@ encodeLoop:
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
blk.recentOffsets[2] = uint32(offset3)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -425,7 +482,7 @@ func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
e.Encode(blk, src)
}
// ResetDict will reset and set a dictionary if not nil
// Reset will reset and set a dictionary if not nil
func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d == nil {
@ -441,10 +498,10 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
const hashLog = bestShortTableBits
cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash4x64(cv, hashLog) // 0 -> 4
nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5
nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4
nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5
nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6
nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7
e.dictTable[nextHash] = prevEntry{
prev: e.dictTable[nextHash].offset,
offset: i,
@ -472,7 +529,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
h := hash8(cv, bestLongTableBits)
h := hashLen(cv, bestLongTableBits, bestLongLen)
e.dictLongTable[h] = prevEntry{
offset: e.maxMatchOff,
prev: e.dictLongTable[h].offset,
@ -482,7 +539,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
off := 8 // First to read
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[off]) << 56)
h := hash8(cv, bestLongTableBits)
h := hashLen(cv, bestLongTableBits, bestLongLen)
e.dictLongTable[h] = prevEntry{
offset: i,
prev: e.dictLongTable[h].offset,

View file

@ -9,6 +9,7 @@ import "fmt"
const (
betterLongTableBits = 19 // Bits used in the long match table
betterLongTableSize = 1 << betterLongTableBits // Size of the table
betterLongLen = 8 // Bytes used for table hash
// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
@ -16,6 +17,7 @@ const (
// This greatly depends on the type of input.
betterShortTableBits = 13 // Bits used in the short match table
betterShortTableSize = 1 << betterShortTableBits // Size of the table
betterShortLen = 5 // Bytes used for table hash
betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
@ -138,7 +140,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -154,8 +156,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -204,7 +206,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -214,10 +216,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
@ -264,7 +266,7 @@ encodeLoop:
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -275,10 +277,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
@ -353,7 +355,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, betterLongTableBits)
nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = candidateL.offset - e.cur
@ -413,8 +415,8 @@ encodeLoop:
}
// Try to find a better match by searching for a long match at the end of the current best match
if true && s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
if s+matched < sLimit {
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
@ -495,10 +497,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
@ -516,8 +518,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@ -553,7 +555,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -656,7 +658,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -672,8 +674,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -724,7 +726,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -734,11 +736,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
@ -787,7 +789,7 @@ encodeLoop:
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -798,11 +800,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
@ -879,7 +881,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, betterLongTableBits)
nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = candidateL.offset - e.cur
@ -940,7 +942,7 @@ encodeLoop:
}
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
@ -1021,11 +1023,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
@ -1045,8 +1047,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@ -1084,7 +1086,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -1113,10 +1115,10 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
const hashLog = betterShortTableBits
cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash5(cv, hashLog) // 0 -> 4
nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5
nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4
nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5
nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
e.dictTable[nextHash] = tableEntry{
val: uint32(cv),
offset: i,
@ -1145,7 +1147,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
h := hash8(cv, betterLongTableBits)
h := hashLen(cv, betterLongTableBits, betterLongLen)
e.dictLongTable[h] = prevEntry{
offset: e.maxMatchOff,
prev: e.dictLongTable[h].offset,
@ -1155,7 +1157,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
off := 8 // First to read
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[off]) << 56)
h := hash8(cv, betterLongTableBits)
h := hashLen(cv, betterLongTableBits, betterLongLen)
e.dictLongTable[h] = prevEntry{
offset: i,
prev: e.dictLongTable[h].offset,

View file

@ -10,6 +10,7 @@ const (
dFastLongTableBits = 17 // Bits used in the long match table
dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
dFastLongLen = 8 // Bytes used for table hash
dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
@ -17,6 +18,8 @@ const (
dFastShortTableBits = tableBits // Bits used in the short match table
dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
dFastShortLen = 5 // Bytes used for table hash
)
type doubleFastEncoder struct {
@ -109,7 +112,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -124,8 +127,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -170,7 +173,7 @@ encodeLoop:
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -208,7 +211,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, dFastLongTableBits)
nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = s - (candidateL.offset - e.cur) + checkAt
@ -304,16 +307,16 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
e.longTable[hash8(cv0, dFastLongTableBits)] = te0
e.longTable[hash8(cv1, dFastLongTableBits)] = te1
e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
cv0 >>= 8
cv1 >>= 8
te0.offset++
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
e.table[hash5(cv0, dFastShortTableBits)] = te0
e.table[hash5(cv1, dFastShortTableBits)] = te1
e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
cv = load6432(src, s)
@ -330,8 +333,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@ -368,7 +371,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -427,7 +430,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -436,8 +439,8 @@ encodeLoop:
var t int32
for {
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -483,7 +486,7 @@ encodeLoop:
s += length + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@ -521,7 +524,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, dFastLongTableBits)
nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = s - (candidateL.offset - e.cur) + checkAt
@ -614,16 +617,16 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
e.longTable[hash8(cv0, dFastLongTableBits)] = te0
e.longTable[hash8(cv1, dFastLongTableBits)] = te1
e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
cv0 >>= 8
cv1 >>= 8
te0.offset++
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
e.table[hash5(cv0, dFastShortTableBits)] = te0
e.table[hash5(cv1, dFastShortTableBits)] = te1
e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
cv = load6432(src, s)
@ -640,8 +643,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv1>>8, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@ -677,7 +680,7 @@ encodeLoop:
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
@ -767,7 +770,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -782,8 +785,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -830,7 +833,7 @@ encodeLoop:
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -868,7 +871,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, dFastLongTableBits)
nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = s - (candidateL.offset - e.cur) + checkAt
@ -965,8 +968,8 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
longHash1 := hash8(cv0, dFastLongTableBits)
longHash2 := hash8(cv0, dFastLongTableBits)
longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
e.longTable[longHash1] = te0
e.longTable[longHash2] = te1
e.markLongShardDirty(longHash1)
@ -977,8 +980,8 @@ encodeLoop:
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
hashVal1 := hash5(cv0, dFastShortTableBits)
hashVal2 := hash5(cv1, dFastShortTableBits)
hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen)
hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen)
e.table[hashVal1] = te0
e.markShardDirty(hashVal1)
e.table[hashVal2] = te1
@ -999,8 +1002,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@ -1039,7 +1042,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// If we encoded more than 64K mark all dirty.
@ -1071,14 +1074,14 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
val: uint32(cv),
offset: e.maxMatchOff,
}
end := int32(len(d.content)) - 8 + e.maxMatchOff
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
val: uint32(cv),
offset: i,
}

View file

@ -6,17 +6,16 @@ package zstd
import (
"fmt"
"math"
"math/bits"
)
const (
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
tableShardSize = tableSize / tableShardCnt // Size of an individual shard
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
maxMatchLength = 131074
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
tableShardSize = tableSize / tableShardCnt // Size of an individual shard
tableFastHashLen = 6
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
maxMatchLength = 131074
)
type tableEntry struct {
@ -103,7 +102,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -122,8 +121,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
@ -135,20 +134,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
@ -178,7 +164,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@ -235,20 +221,7 @@ encodeLoop:
}
// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@ -285,23 +258,10 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
@ -330,7 +290,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -343,7 +303,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
inputMargin = 8
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debug {
if debugEncoder {
if len(src) > maxBlockSize {
panic("src too big")
}
@ -391,7 +351,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -405,8 +365,8 @@ encodeLoop:
// By not using them for the first 3 matches
for {
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
@ -417,21 +377,7 @@ encodeLoop:
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
// length := 4 + e.matchlen(s+6, repIndex+4, src)
// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
var length int32
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length := 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
@ -462,7 +408,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@ -521,21 +467,7 @@ encodeLoop:
panic(fmt.Sprintf("t (%d) < 0 ", t))
}
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@ -572,24 +504,10 @@ encodeLoop:
if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
@ -616,7 +534,7 @@ encodeLoop:
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
@ -696,7 +614,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -715,8 +633,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
@ -730,19 +648,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
@ -773,7 +679,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@ -830,20 +736,7 @@ encodeLoop:
}
// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@ -880,23 +773,10 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.markShardDirty(nextHash)
seq.matchLen = uint32(l) - zstdMinMatch
@ -926,7 +806,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -957,9 +837,9 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
const hashLog = tableBits
cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash6(cv, hashLog) // 0 -> 5
nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6
nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5
nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6
nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
e.dictTable[nextHash] = tableEntry{
val: uint32(cv),
offset: i,

View file

@ -33,7 +33,7 @@ type encoder interface {
Block() *blockEnc
CRC() *xxhash.Digest
AppendCRC([]byte) []byte
WindowSize(size int) int32
WindowSize(size int64) int32
UseBlock(*blockEnc)
Reset(d *dict, singleBlock bool)
}
@ -48,6 +48,8 @@ type encoderState struct {
err error
writeErr error
nWritten int64
nInput int64
frameContentSize int64
headerWritten bool
eofWritten bool
fullFrameWritten bool
@ -120,7 +122,21 @@ func (e *Encoder) Reset(w io.Writer) {
s.w = w
s.err = nil
s.nWritten = 0
s.nInput = 0
s.writeErr = nil
s.frameContentSize = 0
}
// ResetContentSize will reset and set a content size for the next stream.
// If the bytes written does not match the size given an error will be returned
// when calling Close().
// This is removed when Reset is called.
// Sizes <= 0 results in no content size set.
func (e *Encoder) ResetContentSize(w io.Writer, size int64) {
e.Reset(w)
if size >= 0 {
e.state.frameContentSize = size
}
}
// Write data to the encoder.
@ -190,6 +206,7 @@ func (e *Encoder) nextBlock(final bool) error {
return s.err
}
s.nWritten += int64(n2)
s.nInput += int64(len(s.filling))
s.current = s.current[:0]
s.filling = s.filling[:0]
s.headerWritten = true
@ -200,8 +217,8 @@ func (e *Encoder) nextBlock(final bool) error {
var tmp [maxHeaderSize]byte
fh := frameHeader{
ContentSize: 0,
WindowSize: uint32(s.encoder.WindowSize(0)),
ContentSize: uint64(s.frameContentSize),
WindowSize: uint32(s.encoder.WindowSize(s.frameContentSize)),
SingleSegment: false,
Checksum: e.o.crc,
DictID: e.o.dict.ID(),
@ -243,9 +260,10 @@ func (e *Encoder) nextBlock(final bool) error {
// Move blocks forward.
s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
s.nInput += int64(len(s.current))
s.wg.Add(1)
go func(src []byte) {
if debug {
if debugEncoder {
println("Adding block,", len(src), "bytes, final:", final)
}
defer func() {
@ -290,7 +308,7 @@ func (e *Encoder) nextBlock(final bool) error {
}
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
blk.encodeRaw(src)
@ -313,7 +331,7 @@ func (e *Encoder) nextBlock(final bool) error {
//
// The Copy function uses ReaderFrom if available.
func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
if debug {
if debugEncoder {
println("Using ReadFrom")
}
@ -336,20 +354,20 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
switch err {
case io.EOF:
e.state.filling = e.state.filling[:len(e.state.filling)-len(src)]
if debug {
if debugEncoder {
println("ReadFrom: got EOF final block:", len(e.state.filling))
}
return n, nil
case nil:
default:
if debug {
if debugEncoder {
println("ReadFrom: got error:", err)
}
e.state.err = err
return n, err
}
if len(src) > 0 {
if debug {
if debugEncoder {
println("ReadFrom: got space left in source:", len(src))
}
continue
@ -394,6 +412,11 @@ func (e *Encoder) Close() error {
if err != nil {
return err
}
if s.frameContentSize > 0 {
if s.nInput != s.frameContentSize {
return fmt.Errorf("frame content size %d given, but %d bytes was written", s.frameContentSize, s.nInput)
}
}
if e.state.fullFrameWritten {
return s.err
}
@ -470,7 +493,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
fh := frameHeader{
ContentSize: uint64(len(src)),
WindowSize: uint32(enc.WindowSize(len(src))),
WindowSize: uint32(enc.WindowSize(int64(len(src)))),
SingleSegment: single,
Checksum: e.o.crc,
DictID: e.o.dict.ID(),
@ -512,7 +535,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, src)
@ -548,7 +571,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, todo)

View file

@ -24,6 +24,7 @@ type encoderOptions struct {
allLitEntropy bool
customWindow bool
customALEntropy bool
customBlockSize bool
lowMem bool
dict *dict
}
@ -33,7 +34,7 @@ func (o *encoderOptions) setDefault() {
concurrent: runtime.GOMAXPROCS(0),
crc: true,
single: nil,
blockSize: 1 << 16,
blockSize: maxCompressedBlockSize,
windowSize: 8 << 20,
level: SpeedDefault,
allLitEntropy: true,
@ -106,6 +107,7 @@ func WithWindowSize(n int) EOption {
o.customWindow = true
if o.blockSize > o.windowSize {
o.blockSize = o.windowSize
o.customBlockSize = true
}
return nil
}
@ -188,10 +190,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel {
return SpeedDefault
case level >= 6 && level < 10:
return SpeedBetterCompression
case level >= 10:
return SpeedBetterCompression
default:
return SpeedBestCompression
}
return SpeedDefault
}
// String provides a string representation of the compression level.
@ -222,6 +223,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
switch o.level {
case SpeedFastest:
o.windowSize = 4 << 20
if !o.customBlockSize {
o.blockSize = 1 << 16
}
case SpeedDefault:
o.windowSize = 8 << 20
case SpeedBetterCompression:

View file

@ -22,10 +22,6 @@ type frameDec struct {
WindowSize uint64
// maxWindowSize is the maximum windows size to support.
// should never be bigger than max-int.
maxWindowSize uint64
// In order queue of blocks being decoded.
decoding chan *blockDec
@ -50,8 +46,11 @@ type frameDec struct {
}
const (
// The minimum Window_Size is 1 KB.
// MinWindowSize is the minimum Window Size, which is 1 KB.
MinWindowSize = 1 << 10
// MaxWindowSize is the maximum encoder window size
// and the default decoder maximum window size.
MaxWindowSize = 1 << 29
)
@ -61,12 +60,11 @@ var (
)
func newFrameDec(o decoderOptions) *frameDec {
d := frameDec{
o: o,
maxWindowSize: MaxWindowSize,
if o.maxWindowSize > o.maxDecodedSize {
o.maxWindowSize = o.maxDecodedSize
}
if d.maxWindowSize > o.maxDecodedSize {
d.maxWindowSize = o.maxDecodedSize
d := frameDec{
o: o,
}
return &d
}
@ -78,20 +76,33 @@ func newFrameDec(o decoderOptions) *frameDec {
func (d *frameDec) reset(br byteBuffer) error {
d.HasCheckSum = false
d.WindowSize = 0
var b []byte
var signature [4]byte
for {
var err error
b, err = br.readSmall(4)
// Check if we can read more...
b, err := br.readSmall(1)
switch err {
case io.EOF, io.ErrUnexpectedEOF:
return io.EOF
default:
return err
case nil:
signature[0] = b[0]
}
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
if debug {
println("Not skippable", hex.EncodeToString(b), hex.EncodeToString(skippableFrameMagic))
// Read the rest, don't allow io.ErrUnexpectedEOF
b, err = br.readSmall(3)
switch err {
case io.EOF:
return io.EOF
default:
return err
case nil:
copy(signature[1:], b)
}
if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
if debugDecoder {
println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
}
// Break if not skippable frame.
break
@ -99,28 +110,34 @@ func (d *frameDec) reset(br byteBuffer) error {
// Read size to skip
b, err = br.readSmall(4)
if err != nil {
println("Reading Frame Size", err)
if debugDecoder {
println("Reading Frame Size", err)
}
return err
}
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
println("Skipping frame with", n, "bytes.")
err = br.skipN(int(n))
if err != nil {
if debug {
if debugDecoder {
println("Reading discarded frame", err)
}
return err
}
}
if !bytes.Equal(b, frameMagic) {
println("Got magic numbers: ", b, "want:", frameMagic)
if !bytes.Equal(signature[:], frameMagic) {
if debugDecoder {
println("Got magic numbers: ", signature, "want:", frameMagic)
}
return ErrMagicMismatch
}
// Read Frame_Header_Descriptor
fhd, err := br.readByte()
if err != nil {
println("Reading Frame_Header_Descriptor", err)
if debugDecoder {
println("Reading Frame_Header_Descriptor", err)
}
return err
}
d.SingleSegment = fhd&(1<<5) != 0
@ -135,7 +152,9 @@ func (d *frameDec) reset(br byteBuffer) error {
if !d.SingleSegment {
wd, err := br.readByte()
if err != nil {
println("Reading Window_Descriptor", err)
if debugDecoder {
println("Reading Window_Descriptor", err)
}
return err
}
printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
@ -153,7 +172,7 @@ func (d *frameDec) reset(br byteBuffer) error {
size = 4
}
b, err = br.readSmall(int(size))
b, err := br.readSmall(int(size))
if err != nil {
println("Reading Dictionary_ID", err)
return err
@ -167,7 +186,7 @@ func (d *frameDec) reset(br byteBuffer) error {
case 4:
id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
}
if debug {
if debugDecoder {
println("Dict size", size, "ID:", id)
}
if id > 0 {
@ -191,7 +210,7 @@ func (d *frameDec) reset(br byteBuffer) error {
}
d.FrameContentSize = 0
if fcsSize > 0 {
b, err = br.readSmall(fcsSize)
b, err := br.readSmall(fcsSize)
if err != nil {
println("Reading Frame content", err)
return err
@ -209,7 +228,7 @@ func (d *frameDec) reset(br byteBuffer) error {
d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
}
if debug {
if debugDecoder {
println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize)
}
}
@ -230,13 +249,17 @@ func (d *frameDec) reset(br byteBuffer) error {
}
}
if d.WindowSize > d.maxWindowSize {
printf("window size %d > max %d\n", d.WindowSize, d.maxWindowSize)
if d.WindowSize > uint64(d.o.maxWindowSize) {
if debugDecoder {
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
}
return ErrWindowSizeExceeded
}
// The minimum Window_Size is 1 KB.
if d.WindowSize < MinWindowSize {
println("got window size: ", d.WindowSize)
if debugDecoder {
println("got window size: ", d.WindowSize)
}
return ErrWindowSizeTooSmall
}
d.history.windowSize = int(d.WindowSize)
@ -252,7 +275,7 @@ func (d *frameDec) reset(br byteBuffer) error {
// next will start decoding the next block from stream.
func (d *frameDec) next(block *blockDec) error {
if debug {
if debugDecoder {
printf("decoding new block %p:%p", block, block.data)
}
err := block.reset(d.rawInput, d.WindowSize)
@ -263,7 +286,7 @@ func (d *frameDec) next(block *blockDec) error {
return err
}
block.input <- struct{}{}
if debug {
if debugDecoder {
println("next block:", block)
}
d.asyncRunningMu.Lock()
@ -318,12 +341,12 @@ func (d *frameDec) checkCRC() error {
}
if !bytes.Equal(tmp[:], want) {
if debug {
if debugDecoder {
println("CRC Check Failed:", tmp[:], "!=", want)
}
return ErrCRCMismatch
}
if debug {
if debugDecoder {
println("CRC ok", tmp[:])
}
return nil
@ -331,8 +354,8 @@ func (d *frameDec) checkCRC() error {
func (d *frameDec) initAsync() {
if !d.o.lowMem && !d.SingleSegment {
// set max extra size history to 10MB.
d.history.maxSize = d.history.windowSize + maxBlockSize*5
// set max extra size history to 2MB.
d.history.maxSize = d.history.windowSize + maxBlockSize
}
// re-alloc if more than one extra block size.
if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize {
@ -344,7 +367,7 @@ func (d *frameDec) initAsync() {
if cap(d.decoding) < d.o.concurrent {
d.decoding = make(chan *blockDec, d.o.concurrent)
}
if debug {
if debugDecoder {
h := d.history
printf("history init. len: %d, cap: %d", len(h.b), cap(h.b))
}
@ -392,7 +415,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
output <- r
return
}
if debug {
if debugDecoder {
println("got result, from ", d.offset, "to", d.offset+int64(len(r.b)))
d.offset += int64(len(r.b))
}
@ -400,7 +423,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
// Send history to next block
select {
case next = <-d.decoding:
if debug {
if debugDecoder {
println("Sending ", len(d.history.b), "bytes as history")
}
next.history <- &d.history
@ -438,7 +461,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
output <- r
if next == nil {
// There was no decoder available, we wait for one now that we have sent to the writer.
if debug {
if debugDecoder {
println("Sending ", len(d.history.b), " bytes as history")
}
next = <-d.decoding
@ -462,7 +485,7 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
if err != nil {
break
}
if debug {
if debugDecoder {
println("next block:", dec)
}
err = dec.decodeBuf(&d.history)

View file

@ -379,7 +379,7 @@ func (s decSymbol) final() (int, uint8) {
// This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := uint16(br.getBitsFast(s.state.nbBits()))
lowBits := br.get16BitsFast(s.state.nbBits())
s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline(), s.state.addBits()
}

View file

@ -62,9 +62,8 @@ func (s symbolTransform) String() string {
// To indicate that you have populated the histogram call HistogramFinished
// with the value of the highest populated symbol, as well as the number of entries
// in the most populated entry. These are accepted at face value.
// The returned slice will always be length 256.
func (s *fseEncoder) Histogram() []uint32 {
return s.count[:]
func (s *fseEncoder) Histogram() *[256]uint32 {
return &s.count
}
// HistogramFinished can be called to indicate that the histogram has been populated.
@ -229,7 +228,7 @@ func (s *fseEncoder) setRLE(val byte) {
deltaFindState: 0,
deltaNbBits: 0,
}
if debug {
if debugEncoder {
println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val])
}
s.rleVal = val

View file

@ -13,24 +13,24 @@ const (
prime8bytes = 0xcf1bbcdcb7a56463
)
// hashLen returns a hash of the lowest l bytes of u for a size size of h bytes.
// l must be >=4 and <=8. Any other value will return hash for 4 bytes.
// h should always be <32.
// Preferably h and l should be a constant.
// FIXME: This does NOT get resolved, if 'mls' is constant,
// so this cannot be used.
func hashLen(u uint64, hashLog, mls uint8) uint32 {
// hashLen returns a hash of the lowest mls bytes of with length output bits.
// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
// length should always be < 32.
// Preferably length and mls should be a constant for inlining.
func hashLen(u uint64, length, mls uint8) uint32 {
switch mls {
case 3:
return (uint32(u<<8) * prime3bytes) >> (32 - length)
case 5:
return hash5(u, hashLog)
return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
case 6:
return hash6(u, hashLog)
return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
case 7:
return hash7(u, hashLog)
return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
case 8:
return hash8(u, hashLog)
return uint32((u * prime8bytes) >> (64 - length))
default:
return hash4x64(u, hashLog)
return (uint32(u) * prime4bytes) >> (32 - length)
}
}
@ -39,39 +39,3 @@ func hashLen(u uint64, hashLog, mls uint8) uint32 {
func hash3(u uint32, h uint8) uint32 {
return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
}
// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> ((32 - h) & 31)
}
// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4x64(u uint64, h uint8) uint32 {
return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
}
// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash5(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
}
// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash6(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
}
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
}
// hash8 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash8(u uint64, h uint8) uint32 {
return uint32((u * prime8bytes) >> ((64 - h) & 63))
}

View file

@ -195,7 +195,6 @@ func (d *Digest) UnmarshalBinary(b []byte) error {
b, d.v4 = consumeUint64(b)
b, d.total = consumeUint64(b)
copy(d.mem[:], b)
b = b[len(d.mem):]
d.n = int(d.total % uint64(len(d.mem)))
return nil
}

View file

@ -1,12 +1,13 @@
// +build !appengine
// +build gc
// +build !purego
// +build !noasm
#include "textflag.h"
// Register allocation:
// AX h
// CX pointer to advance through b
// SI pointer to advance through b
// DX n
// BX loop end
// R8 v1, k1
@ -16,39 +17,39 @@
// R12 tmp
// R13 prime1v
// R14 prime2v
// R15 prime4v
// DI prime4v
// round reads from and advances the buffer pointer in CX.
// round reads from and advances the buffer pointer in SI.
// It assumes that R13 has prime1v and R14 has prime2v.
#define round(r) \
MOVQ (CX), R12 \
ADDQ $8, CX \
MOVQ (SI), R12 \
ADDQ $8, SI \
IMULQ R14, R12 \
ADDQ R12, r \
ROLQ $31, r \
IMULQ R13, r
// mergeRound applies a merge round on the two registers acc and val.
// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
#define mergeRound(acc, val) \
IMULQ R14, val \
ROLQ $31, val \
IMULQ R13, val \
XORQ val, acc \
IMULQ R13, acc \
ADDQ R15, acc
ADDQ DI, acc
// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOSPLIT, $0-32
// Load fixed primes.
MOVQ ·prime1v(SB), R13
MOVQ ·prime2v(SB), R14
MOVQ ·prime4v(SB), R15
MOVQ ·prime4v(SB), DI
// Load slice.
MOVQ b_base+0(FP), CX
MOVQ b_base+0(FP), SI
MOVQ b_len+8(FP), DX
LEAQ (CX)(DX*1), BX
LEAQ (SI)(DX*1), BX
// The first loop limit will be len(b)-32.
SUBQ $32, BX
@ -65,14 +66,14 @@ TEXT ·Sum64(SB), NOSPLIT, $0-32
XORQ R11, R11
SUBQ R13, R11
// Loop until CX > BX.
// Loop until SI > BX.
blockLoop:
round(R8)
round(R9)
round(R10)
round(R11)
CMPQ CX, BX
CMPQ SI, BX
JLE blockLoop
MOVQ R8, AX
@ -100,16 +101,16 @@ noBlocks:
afterBlocks:
ADDQ DX, AX
// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
ADDQ $24, BX
CMPQ CX, BX
CMPQ SI, BX
JG fourByte
wordLoop:
// Calculate k1.
MOVQ (CX), R8
ADDQ $8, CX
MOVQ (SI), R8
ADDQ $8, SI
IMULQ R14, R8
ROLQ $31, R8
IMULQ R13, R8
@ -117,18 +118,18 @@ wordLoop:
XORQ R8, AX
ROLQ $27, AX
IMULQ R13, AX
ADDQ R15, AX
ADDQ DI, AX
CMPQ CX, BX
CMPQ SI, BX
JLE wordLoop
fourByte:
ADDQ $4, BX
CMPQ CX, BX
CMPQ SI, BX
JG singles
MOVL (CX), R8
ADDQ $4, CX
MOVL (SI), R8
ADDQ $4, SI
IMULQ R13, R8
XORQ R8, AX
@ -138,19 +139,19 @@ fourByte:
singles:
ADDQ $4, BX
CMPQ CX, BX
CMPQ SI, BX
JGE finalize
singlesLoop:
MOVBQZX (CX), R12
ADDQ $1, CX
MOVBQZX (SI), R12
ADDQ $1, SI
IMULQ ·prime5v(SB), R12
XORQ R12, AX
ROLQ $11, AX
IMULQ R13, AX
CMPQ CX, BX
CMPQ SI, BX
JL singlesLoop
finalize:
@ -179,13 +180,13 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40
MOVQ ·prime2v(SB), R14
// Load slice.
MOVQ arg1_base+8(FP), CX
MOVQ arg1_len+16(FP), DX
LEAQ (CX)(DX*1), BX
MOVQ b_base+8(FP), SI
MOVQ b_len+16(FP), DX
LEAQ (SI)(DX*1), BX
SUBQ $32, BX
// Load vN from d.
MOVQ arg+0(FP), AX
MOVQ d+0(FP), AX
MOVQ 0(AX), R8 // v1
MOVQ 8(AX), R9 // v2
MOVQ 16(AX), R10 // v3
@ -199,7 +200,7 @@ blockLoop:
round(R10)
round(R11)
CMPQ CX, BX
CMPQ SI, BX
JLE blockLoop
// Copy vN back to d.
@ -208,8 +209,8 @@ blockLoop:
MOVQ R10, 16(AX)
MOVQ R11, 24(AX)
// The number of bytes written is CX minus the old base pointer.
SUBQ arg1_base+8(FP), CX
MOVQ CX, ret+32(FP)
// The number of bytes written is SI minus the old base pointer.
SUBQ b_base+8(FP), SI
MOVQ SI, ret+32(FP)
RET

View file

@ -0,0 +1,186 @@
// +build gc,!purego,!noasm
#include "textflag.h"
// Register allocation.
#define digest R1
#define h R2 // Return value.
#define p R3 // Input pointer.
#define len R4
#define nblocks R5 // len / 32.
#define prime1 R7
#define prime2 R8
#define prime3 R9
#define prime4 R10
#define prime5 R11
#define v1 R12
#define v2 R13
#define v3 R14
#define v4 R15
#define x1 R20
#define x2 R21
#define x3 R22
#define x4 R23
#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
MUL prime1, acc \
// x = round(0, x).
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
MUL prime1, x \
#define mergeRound(x) \
round0(x) \
EOR x, h \
MADD h, prime4, prime1, h \
// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
#define blocksLoop() \
LSR $5, len, nblocks \
PCALIGN $16 \
loop: \
LDP.P 32(p), (x1, x2) \
round(v1, x1) \
LDP -16(p), (x3, x4) \
round(v2, x2) \
SUB $1, nblocks \
round(v3, x3) \
round(v4, x4) \
CBNZ nblocks, loop \
// The primes are repeated here to ensure that they're stored
// in a contiguous array, so we can load them with LDP.
DATA primes<> +0(SB)/8, $11400714785074694791
DATA primes<> +8(SB)/8, $14029467366897019727
DATA primes<>+16(SB)/8, $1609587929392839161
DATA primes<>+24(SB)/8, $9650029242287828579
DATA primes<>+32(SB)/8, $2870177450012600261
GLOBL primes<>(SB), NOPTR+RODATA, $40
// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
LDP b_base+0(FP), (p, len)
LDP primes<> +0(SB), (prime1, prime2)
LDP primes<>+16(SB), (prime3, prime4)
MOVD primes<>+32(SB), prime5
CMP $32, len
CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
BLO afterLoop
ADD prime1, prime2, v1
MOVD prime2, v2
MOVD $0, v3
NEG prime1, v4
blocksLoop()
ROR $64-1, v1, x1
ROR $64-7, v2, x2
ADD x1, x2
ROR $64-12, v3, x3
ROR $64-18, v4, x4
ADD x3, x4
ADD x2, x4, h
mergeRound(v1)
mergeRound(v2)
mergeRound(v3)
mergeRound(v4)
afterLoop:
ADD len, h
TBZ $4, len, try8
LDP.P 16(p), (x1, x2)
round0(x1)
ROR $64-27, h
EOR x1 @> 64-27, h, h
MADD h, prime4, prime1, h
round0(x2)
ROR $64-27, h
EOR x2 @> 64-27, h
MADD h, prime4, prime1, h
try8:
TBZ $3, len, try4
MOVD.P 8(p), x1
round0(x1)
ROR $64-27, h
EOR x1 @> 64-27, h
MADD h, prime4, prime1, h
try4:
TBZ $2, len, try2
MOVWU.P 4(p), x2
MUL prime1, x2
ROR $64-23, h
EOR x2 @> 64-23, h
MADD h, prime3, prime2, h
try2:
TBZ $1, len, try1
MOVHU.P 2(p), x3
AND $255, x3, x1
LSR $8, x3, x2
MUL prime5, x1
ROR $64-11, h
EOR x1 @> 64-11, h
MUL prime1, h
MUL prime5, x2
ROR $64-11, h
EOR x2 @> 64-11, h
MUL prime1, h
try1:
TBZ $0, len, end
MOVBU (p), x4
MUL prime5, x4
ROR $64-11, h
EOR x4 @> 64-11, h
MUL prime1, h
end:
EOR h >> 33, h
MUL prime2, h
EOR h >> 29, h
MUL prime3, h
EOR h >> 32, h
MOVD h, ret+24(FP)
RET
// func writeBlocks(d *Digest, b []byte) int
//
// Assumes len(b) >= 32.
TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
LDP primes<>(SB), (prime1, prime2)
// Load state. Assume v[1-4] are stored contiguously.
MOVD d+0(FP), digest
LDP 0(digest), (v1, v2)
LDP 16(digest), (v3, v4)
LDP b_base+8(FP), (p, len)
blocksLoop()
// Store updated state.
STP (v1, v2), 0(digest)
STP (v3, v4), 16(digest)
BIC $31, len
MOVD len, ret+32(FP)
RET

View file

@ -1,6 +1,9 @@
//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm
// +build amd64 arm64
// +build !appengine
// +build gc
// +build !purego
// +build !noasm
package xxhash
@ -10,4 +13,4 @@ package xxhash
func Sum64(b []byte) uint64
//go:noescape
func writeBlocks(*Digest, []byte) int
func writeBlocks(d *Digest, b []byte) int

View file

@ -1,4 +1,5 @@
// +build !amd64 appengine !gc purego
//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm
// +build !amd64,!arm64 appengine !gc purego noasm
package xxhash

View file

@ -278,7 +278,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
mlState = mlTable[mlState.newState()&maxTableMask]
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.getBitsFast(nBits)
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
@ -326,7 +326,7 @@ func (s *sequenceDecs) updateAlt(br *bitReader) {
s.offsets.state.state = s.offsets.state.dt[c.newState()]
return
}
bits := br.getBitsFast(nBits)
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]

Some files were not shown because too many files have changed in this diff Show more