enc_better.go 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import "fmt"
  6. const (
  7. betterLongTableBits = 19 // Bits used in the long match table
  8. betterLongTableSize = 1 << betterLongTableBits // Size of the table
  9. // Note: Increasing the short table bits or making the hash shorter
  10. // can actually lead to compression degradation since it will 'steal' more from the
  11. // long match table and match offsets are quite big.
  12. // This greatly depends on the type of input.
  13. betterShortTableBits = 13 // Bits used in the short match table
  14. betterShortTableSize = 1 << betterShortTableBits // Size of the table
  15. betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
  16. betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
  17. betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
  18. betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
  19. )
  20. type prevEntry struct {
  21. offset int32
  22. prev int32
  23. }
  24. // betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
  25. // The long match table contains the previous entry with the same hash,
  26. // effectively making it a "chain" of length 2.
  27. // When we find a long match we choose between the two values and select the longest.
  28. // When we find a short match, after checking the long, we check if we can find a long at n+1
  29. // and that it is longer (lazy matching).
  30. type betterFastEncoder struct {
  31. fastBase
  32. table [betterShortTableSize]tableEntry
  33. longTable [betterLongTableSize]prevEntry
  34. }
  35. type betterFastEncoderDict struct {
  36. betterFastEncoder
  37. dictTable []tableEntry
  38. dictLongTable []prevEntry
  39. shortTableShardDirty [betterShortTableShardCnt]bool
  40. longTableShardDirty [betterLongTableShardCnt]bool
  41. allDirty bool
  42. }
  43. // Encode improves compression...
  44. func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
  45. const (
  46. // Input margin is the number of bytes we read (8)
  47. // and the maximum we will read ahead (2)
  48. inputMargin = 8 + 2
  49. minNonLiteralBlockSize = 16
  50. )
  51. // Protect against e.cur wraparound.
  52. for e.cur >= bufferReset {
  53. if len(e.hist) == 0 {
  54. for i := range e.table[:] {
  55. e.table[i] = tableEntry{}
  56. }
  57. for i := range e.longTable[:] {
  58. e.longTable[i] = prevEntry{}
  59. }
  60. e.cur = e.maxMatchOff
  61. break
  62. }
  63. // Shift down everything in the table that isn't already too far away.
  64. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  65. for i := range e.table[:] {
  66. v := e.table[i].offset
  67. if v < minOff {
  68. v = 0
  69. } else {
  70. v = v - e.cur + e.maxMatchOff
  71. }
  72. e.table[i].offset = v
  73. }
  74. for i := range e.longTable[:] {
  75. v := e.longTable[i].offset
  76. v2 := e.longTable[i].prev
  77. if v < minOff {
  78. v = 0
  79. v2 = 0
  80. } else {
  81. v = v - e.cur + e.maxMatchOff
  82. if v2 < minOff {
  83. v2 = 0
  84. } else {
  85. v2 = v2 - e.cur + e.maxMatchOff
  86. }
  87. }
  88. e.longTable[i] = prevEntry{
  89. offset: v,
  90. prev: v2,
  91. }
  92. }
  93. e.cur = e.maxMatchOff
  94. break
  95. }
  96. s := e.addBlock(src)
  97. blk.size = len(src)
  98. if len(src) < minNonLiteralBlockSize {
  99. blk.extraLits = len(src)
  100. blk.literals = blk.literals[:len(src)]
  101. copy(blk.literals, src)
  102. return
  103. }
  104. // Override src
  105. src = e.hist
  106. sLimit := int32(len(src)) - inputMargin
  107. // stepSize is the number of bytes to skip on every main loop iteration.
  108. // It should be >= 1.
  109. const stepSize = 1
  110. const kSearchStrength = 9
  111. // nextEmit is where in src the next emitLiteral should start from.
  112. nextEmit := s
  113. cv := load6432(src, s)
  114. // Relative offsets
  115. offset1 := int32(blk.recentOffsets[0])
  116. offset2 := int32(blk.recentOffsets[1])
  117. addLiterals := func(s *seq, until int32) {
  118. if until == nextEmit {
  119. return
  120. }
  121. blk.literals = append(blk.literals, src[nextEmit:until]...)
  122. s.litLen = uint32(until - nextEmit)
  123. }
  124. if debug {
  125. println("recent offsets:", blk.recentOffsets)
  126. }
  127. encodeLoop:
  128. for {
  129. var t int32
  130. // We allow the encoder to optionally turn off repeat offsets across blocks
  131. canRepeat := len(blk.sequences) > 2
  132. var matched int32
  133. for {
  134. if debugAsserts && canRepeat && offset1 == 0 {
  135. panic("offset0 was 0")
  136. }
  137. nextHashS := hash5(cv, betterShortTableBits)
  138. nextHashL := hash8(cv, betterLongTableBits)
  139. candidateL := e.longTable[nextHashL]
  140. candidateS := e.table[nextHashS]
  141. const repOff = 1
  142. repIndex := s - offset1 + repOff
  143. off := s + e.cur
  144. e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
  145. e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
  146. if canRepeat {
  147. if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
  148. // Consider history as well.
  149. var seq seq
  150. lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
  151. seq.matchLen = uint32(lenght - zstdMinMatch)
  152. // We might be able to match backwards.
  153. // Extend as long as we can.
  154. start := s + repOff
  155. // We end the search early, so we don't risk 0 literals
  156. // and have to do special offset treatment.
  157. startLimit := nextEmit + 1
  158. tMin := s - e.maxMatchOff
  159. if tMin < 0 {
  160. tMin = 0
  161. }
  162. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  163. repIndex--
  164. start--
  165. seq.matchLen++
  166. }
  167. addLiterals(&seq, start)
  168. // rep 0
  169. seq.offset = 1
  170. if debugSequences {
  171. println("repeat sequence", seq, "next s:", s)
  172. }
  173. blk.sequences = append(blk.sequences, seq)
  174. // Index match start+1 (long) -> s - 1
  175. index0 := s + repOff
  176. s += lenght + repOff
  177. nextEmit = s
  178. if s >= sLimit {
  179. if debug {
  180. println("repeat ended", s, lenght)
  181. }
  182. break encodeLoop
  183. }
  184. // Index skipped...
  185. for index0 < s-1 {
  186. cv0 := load6432(src, index0)
  187. cv1 := cv0 >> 8
  188. h0 := hash8(cv0, betterLongTableBits)
  189. off := index0 + e.cur
  190. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  191. e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  192. index0 += 2
  193. }
  194. cv = load6432(src, s)
  195. continue
  196. }
  197. const repOff2 = 1
  198. // We deviate from the reference encoder and also check offset 2.
  199. // Still slower and not much better, so disabled.
  200. // repIndex = s - offset2 + repOff2
  201. if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
  202. // Consider history as well.
  203. var seq seq
  204. lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
  205. seq.matchLen = uint32(lenght - zstdMinMatch)
  206. // We might be able to match backwards.
  207. // Extend as long as we can.
  208. start := s + repOff2
  209. // We end the search early, so we don't risk 0 literals
  210. // and have to do special offset treatment.
  211. startLimit := nextEmit + 1
  212. tMin := s - e.maxMatchOff
  213. if tMin < 0 {
  214. tMin = 0
  215. }
  216. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  217. repIndex--
  218. start--
  219. seq.matchLen++
  220. }
  221. addLiterals(&seq, start)
  222. // rep 2
  223. seq.offset = 2
  224. if debugSequences {
  225. println("repeat sequence 2", seq, "next s:", s)
  226. }
  227. blk.sequences = append(blk.sequences, seq)
  228. index0 := s + repOff2
  229. s += lenght + repOff2
  230. nextEmit = s
  231. if s >= sLimit {
  232. if debug {
  233. println("repeat ended", s, lenght)
  234. }
  235. break encodeLoop
  236. }
  237. // Index skipped...
  238. for index0 < s-1 {
  239. cv0 := load6432(src, index0)
  240. cv1 := cv0 >> 8
  241. h0 := hash8(cv0, betterLongTableBits)
  242. off := index0 + e.cur
  243. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  244. e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  245. index0 += 2
  246. }
  247. cv = load6432(src, s)
  248. // Swap offsets
  249. offset1, offset2 = offset2, offset1
  250. continue
  251. }
  252. }
  253. // Find the offsets of our two matches.
  254. coffsetL := candidateL.offset - e.cur
  255. coffsetLP := candidateL.prev - e.cur
  256. // Check if we have a long match.
  257. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  258. // Found a long match, at least 8 bytes.
  259. matched = e.matchlen(s+8, coffsetL+8, src) + 8
  260. t = coffsetL
  261. if debugAsserts && s <= t {
  262. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  263. }
  264. if debugAsserts && s-t > e.maxMatchOff {
  265. panic("s - t >e.maxMatchOff")
  266. }
  267. if debugMatches {
  268. println("long match")
  269. }
  270. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  271. // Found a long match, at least 8 bytes.
  272. prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
  273. if prevMatch > matched {
  274. matched = prevMatch
  275. t = coffsetLP
  276. }
  277. if debugAsserts && s <= t {
  278. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  279. }
  280. if debugAsserts && s-t > e.maxMatchOff {
  281. panic("s - t >e.maxMatchOff")
  282. }
  283. if debugMatches {
  284. println("long match")
  285. }
  286. }
  287. break
  288. }
  289. // Check if we have a long match on prev.
  290. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  291. // Found a long match, at least 8 bytes.
  292. matched = e.matchlen(s+8, coffsetLP+8, src) + 8
  293. t = coffsetLP
  294. if debugAsserts && s <= t {
  295. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  296. }
  297. if debugAsserts && s-t > e.maxMatchOff {
  298. panic("s - t >e.maxMatchOff")
  299. }
  300. if debugMatches {
  301. println("long match")
  302. }
  303. break
  304. }
  305. coffsetS := candidateS.offset - e.cur
  306. // Check if we have a short match.
  307. if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
  308. // found a regular match
  309. matched = e.matchlen(s+4, coffsetS+4, src) + 4
  310. // See if we can find a long match at s+1
  311. const checkAt = 1
  312. cv := load6432(src, s+checkAt)
  313. nextHashL = hash8(cv, betterLongTableBits)
  314. candidateL = e.longTable[nextHashL]
  315. coffsetL = candidateL.offset - e.cur
  316. // We can store it, since we have at least a 4 byte match.
  317. e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
  318. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  319. // Found a long match, at least 8 bytes.
  320. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  321. if matchedNext > matched {
  322. t = coffsetL
  323. s += checkAt
  324. matched = matchedNext
  325. if debugMatches {
  326. println("long match (after short)")
  327. }
  328. break
  329. }
  330. }
  331. // Check prev long...
  332. coffsetL = candidateL.prev - e.cur
  333. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  334. // Found a long match, at least 8 bytes.
  335. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  336. if matchedNext > matched {
  337. t = coffsetL
  338. s += checkAt
  339. matched = matchedNext
  340. if debugMatches {
  341. println("prev long match (after short)")
  342. }
  343. break
  344. }
  345. }
  346. t = coffsetS
  347. if debugAsserts && s <= t {
  348. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  349. }
  350. if debugAsserts && s-t > e.maxMatchOff {
  351. panic("s - t >e.maxMatchOff")
  352. }
  353. if debugAsserts && t < 0 {
  354. panic("t<0")
  355. }
  356. if debugMatches {
  357. println("short match")
  358. }
  359. break
  360. }
  361. // No match found, move forward in input.
  362. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  363. if s >= sLimit {
  364. break encodeLoop
  365. }
  366. cv = load6432(src, s)
  367. }
  368. // Try to find a better match by searching for a long match at the end of the current best match
  369. if true && s+matched < sLimit {
  370. nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
  371. cv := load3232(src, s)
  372. candidateL := e.longTable[nextHashL]
  373. coffsetL := candidateL.offset - e.cur - matched
  374. if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
  375. // Found a long match, at least 4 bytes.
  376. matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
  377. if matchedNext > matched {
  378. t = coffsetL
  379. matched = matchedNext
  380. if debugMatches {
  381. println("long match at end-of-match")
  382. }
  383. }
  384. }
  385. // Check prev long...
  386. if true {
  387. coffsetL = candidateL.prev - e.cur - matched
  388. if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
  389. // Found a long match, at least 4 bytes.
  390. matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
  391. if matchedNext > matched {
  392. t = coffsetL
  393. matched = matchedNext
  394. if debugMatches {
  395. println("prev long match at end-of-match")
  396. }
  397. }
  398. }
  399. }
  400. }
  401. // A match has been found. Update recent offsets.
  402. offset2 = offset1
  403. offset1 = s - t
  404. if debugAsserts && s <= t {
  405. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  406. }
  407. if debugAsserts && canRepeat && int(offset1) > len(src) {
  408. panic("invalid offset")
  409. }
  410. // Extend the n-byte match as long as possible.
  411. l := matched
  412. // Extend backwards
  413. tMin := s - e.maxMatchOff
  414. if tMin < 0 {
  415. tMin = 0
  416. }
  417. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  418. s--
  419. t--
  420. l++
  421. }
  422. // Write our sequence
  423. var seq seq
  424. seq.litLen = uint32(s - nextEmit)
  425. seq.matchLen = uint32(l - zstdMinMatch)
  426. if seq.litLen > 0 {
  427. blk.literals = append(blk.literals, src[nextEmit:s]...)
  428. }
  429. seq.offset = uint32(s-t) + 3
  430. s += l
  431. if debugSequences {
  432. println("sequence", seq, "next s:", s)
  433. }
  434. blk.sequences = append(blk.sequences, seq)
  435. nextEmit = s
  436. if s >= sLimit {
  437. break encodeLoop
  438. }
  439. // Index match start+1 (long) -> s - 1
  440. index0 := s - l + 1
  441. for index0 < s-1 {
  442. cv0 := load6432(src, index0)
  443. cv1 := cv0 >> 8
  444. h0 := hash8(cv0, betterLongTableBits)
  445. off := index0 + e.cur
  446. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  447. e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  448. index0 += 2
  449. }
  450. cv = load6432(src, s)
  451. if !canRepeat {
  452. continue
  453. }
  454. // Check offset 2
  455. for {
  456. o2 := s - offset2
  457. if load3232(src, o2) != uint32(cv) {
  458. // Do regular search
  459. break
  460. }
  461. // Store this, since we have it.
  462. nextHashS := hash5(cv, betterShortTableBits)
  463. nextHashL := hash8(cv, betterLongTableBits)
  464. // We have at least 4 byte match.
  465. // No need to check backwards. We come straight from a match
  466. l := 4 + e.matchlen(s+4, o2+4, src)
  467. e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
  468. e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  469. seq.matchLen = uint32(l) - zstdMinMatch
  470. seq.litLen = 0
  471. // Since litlen is always 0, this is offset 1.
  472. seq.offset = 1
  473. s += l
  474. nextEmit = s
  475. if debugSequences {
  476. println("sequence", seq, "next s:", s)
  477. }
  478. blk.sequences = append(blk.sequences, seq)
  479. // Swap offset 1 and 2.
  480. offset1, offset2 = offset2, offset1
  481. if s >= sLimit {
  482. // Finished
  483. break encodeLoop
  484. }
  485. cv = load6432(src, s)
  486. }
  487. }
  488. if int(nextEmit) < len(src) {
  489. blk.literals = append(blk.literals, src[nextEmit:]...)
  490. blk.extraLits = len(src) - int(nextEmit)
  491. }
  492. blk.recentOffsets[0] = uint32(offset1)
  493. blk.recentOffsets[1] = uint32(offset2)
  494. if debug {
  495. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  496. }
  497. }
  498. // EncodeNoHist will encode a block with no history and no following blocks.
  499. // Most notable difference is that src will not be copied for history and
  500. // we do not need to check for max match length.
  501. func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
  502. e.ensureHist(len(src))
  503. e.Encode(blk, src)
  504. }
  505. // Encode improves compression...
  506. func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
  507. const (
  508. // Input margin is the number of bytes we read (8)
  509. // and the maximum we will read ahead (2)
  510. inputMargin = 8 + 2
  511. minNonLiteralBlockSize = 16
  512. )
  513. // Protect against e.cur wraparound.
  514. for e.cur >= bufferReset {
  515. if len(e.hist) == 0 {
  516. for i := range e.table[:] {
  517. e.table[i] = tableEntry{}
  518. }
  519. for i := range e.longTable[:] {
  520. e.longTable[i] = prevEntry{}
  521. }
  522. e.cur = e.maxMatchOff
  523. e.allDirty = true
  524. break
  525. }
  526. // Shift down everything in the table that isn't already too far away.
  527. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  528. for i := range e.table[:] {
  529. v := e.table[i].offset
  530. if v < minOff {
  531. v = 0
  532. } else {
  533. v = v - e.cur + e.maxMatchOff
  534. }
  535. e.table[i].offset = v
  536. }
  537. for i := range e.longTable[:] {
  538. v := e.longTable[i].offset
  539. v2 := e.longTable[i].prev
  540. if v < minOff {
  541. v = 0
  542. v2 = 0
  543. } else {
  544. v = v - e.cur + e.maxMatchOff
  545. if v2 < minOff {
  546. v2 = 0
  547. } else {
  548. v2 = v2 - e.cur + e.maxMatchOff
  549. }
  550. }
  551. e.longTable[i] = prevEntry{
  552. offset: v,
  553. prev: v2,
  554. }
  555. }
  556. e.allDirty = true
  557. e.cur = e.maxMatchOff
  558. break
  559. }
  560. s := e.addBlock(src)
  561. blk.size = len(src)
  562. if len(src) < minNonLiteralBlockSize {
  563. blk.extraLits = len(src)
  564. blk.literals = blk.literals[:len(src)]
  565. copy(blk.literals, src)
  566. return
  567. }
  568. // Override src
  569. src = e.hist
  570. sLimit := int32(len(src)) - inputMargin
  571. // stepSize is the number of bytes to skip on every main loop iteration.
  572. // It should be >= 1.
  573. const stepSize = 1
  574. const kSearchStrength = 9
  575. // nextEmit is where in src the next emitLiteral should start from.
  576. nextEmit := s
  577. cv := load6432(src, s)
  578. // Relative offsets
  579. offset1 := int32(blk.recentOffsets[0])
  580. offset2 := int32(blk.recentOffsets[1])
  581. addLiterals := func(s *seq, until int32) {
  582. if until == nextEmit {
  583. return
  584. }
  585. blk.literals = append(blk.literals, src[nextEmit:until]...)
  586. s.litLen = uint32(until - nextEmit)
  587. }
  588. if debug {
  589. println("recent offsets:", blk.recentOffsets)
  590. }
  591. encodeLoop:
  592. for {
  593. var t int32
  594. // We allow the encoder to optionally turn off repeat offsets across blocks
  595. canRepeat := len(blk.sequences) > 2
  596. var matched int32
  597. for {
  598. if debugAsserts && canRepeat && offset1 == 0 {
  599. panic("offset0 was 0")
  600. }
  601. nextHashS := hash5(cv, betterShortTableBits)
  602. nextHashL := hash8(cv, betterLongTableBits)
  603. candidateL := e.longTable[nextHashL]
  604. candidateS := e.table[nextHashS]
  605. const repOff = 1
  606. repIndex := s - offset1 + repOff
  607. off := s + e.cur
  608. e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
  609. e.markLongShardDirty(nextHashL)
  610. e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
  611. e.markShortShardDirty(nextHashS)
  612. if canRepeat {
  613. if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
  614. // Consider history as well.
  615. var seq seq
  616. lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
  617. seq.matchLen = uint32(lenght - zstdMinMatch)
  618. // We might be able to match backwards.
  619. // Extend as long as we can.
  620. start := s + repOff
  621. // We end the search early, so we don't risk 0 literals
  622. // and have to do special offset treatment.
  623. startLimit := nextEmit + 1
  624. tMin := s - e.maxMatchOff
  625. if tMin < 0 {
  626. tMin = 0
  627. }
  628. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  629. repIndex--
  630. start--
  631. seq.matchLen++
  632. }
  633. addLiterals(&seq, start)
  634. // rep 0
  635. seq.offset = 1
  636. if debugSequences {
  637. println("repeat sequence", seq, "next s:", s)
  638. }
  639. blk.sequences = append(blk.sequences, seq)
  640. // Index match start+1 (long) -> s - 1
  641. index0 := s + repOff
  642. s += lenght + repOff
  643. nextEmit = s
  644. if s >= sLimit {
  645. if debug {
  646. println("repeat ended", s, lenght)
  647. }
  648. break encodeLoop
  649. }
  650. // Index skipped...
  651. for index0 < s-1 {
  652. cv0 := load6432(src, index0)
  653. cv1 := cv0 >> 8
  654. h0 := hash8(cv0, betterLongTableBits)
  655. off := index0 + e.cur
  656. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  657. e.markLongShardDirty(h0)
  658. h1 := hash5(cv1, betterShortTableBits)
  659. e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
  660. e.markShortShardDirty(h1)
  661. index0 += 2
  662. }
  663. cv = load6432(src, s)
  664. continue
  665. }
  666. const repOff2 = 1
  667. // We deviate from the reference encoder and also check offset 2.
  668. // Still slower and not much better, so disabled.
  669. // repIndex = s - offset2 + repOff2
  670. if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
  671. // Consider history as well.
  672. var seq seq
  673. lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
  674. seq.matchLen = uint32(lenght - zstdMinMatch)
  675. // We might be able to match backwards.
  676. // Extend as long as we can.
  677. start := s + repOff2
  678. // We end the search early, so we don't risk 0 literals
  679. // and have to do special offset treatment.
  680. startLimit := nextEmit + 1
  681. tMin := s - e.maxMatchOff
  682. if tMin < 0 {
  683. tMin = 0
  684. }
  685. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  686. repIndex--
  687. start--
  688. seq.matchLen++
  689. }
  690. addLiterals(&seq, start)
  691. // rep 2
  692. seq.offset = 2
  693. if debugSequences {
  694. println("repeat sequence 2", seq, "next s:", s)
  695. }
  696. blk.sequences = append(blk.sequences, seq)
  697. index0 := s + repOff2
  698. s += lenght + repOff2
  699. nextEmit = s
  700. if s >= sLimit {
  701. if debug {
  702. println("repeat ended", s, lenght)
  703. }
  704. break encodeLoop
  705. }
  706. // Index skipped...
  707. for index0 < s-1 {
  708. cv0 := load6432(src, index0)
  709. cv1 := cv0 >> 8
  710. h0 := hash8(cv0, betterLongTableBits)
  711. off := index0 + e.cur
  712. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  713. e.markLongShardDirty(h0)
  714. h1 := hash5(cv1, betterShortTableBits)
  715. e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
  716. e.markShortShardDirty(h1)
  717. index0 += 2
  718. }
  719. cv = load6432(src, s)
  720. // Swap offsets
  721. offset1, offset2 = offset2, offset1
  722. continue
  723. }
  724. }
  725. // Find the offsets of our two matches.
  726. coffsetL := candidateL.offset - e.cur
  727. coffsetLP := candidateL.prev - e.cur
  728. // Check if we have a long match.
  729. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  730. // Found a long match, at least 8 bytes.
  731. matched = e.matchlen(s+8, coffsetL+8, src) + 8
  732. t = coffsetL
  733. if debugAsserts && s <= t {
  734. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  735. }
  736. if debugAsserts && s-t > e.maxMatchOff {
  737. panic("s - t >e.maxMatchOff")
  738. }
  739. if debugMatches {
  740. println("long match")
  741. }
  742. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  743. // Found a long match, at least 8 bytes.
  744. prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
  745. if prevMatch > matched {
  746. matched = prevMatch
  747. t = coffsetLP
  748. }
  749. if debugAsserts && s <= t {
  750. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  751. }
  752. if debugAsserts && s-t > e.maxMatchOff {
  753. panic("s - t >e.maxMatchOff")
  754. }
  755. if debugMatches {
  756. println("long match")
  757. }
  758. }
  759. break
  760. }
  761. // Check if we have a long match on prev.
  762. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  763. // Found a long match, at least 8 bytes.
  764. matched = e.matchlen(s+8, coffsetLP+8, src) + 8
  765. t = coffsetLP
  766. if debugAsserts && s <= t {
  767. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  768. }
  769. if debugAsserts && s-t > e.maxMatchOff {
  770. panic("s - t >e.maxMatchOff")
  771. }
  772. if debugMatches {
  773. println("long match")
  774. }
  775. break
  776. }
  777. coffsetS := candidateS.offset - e.cur
  778. // Check if we have a short match.
  779. if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
  780. // found a regular match
  781. matched = e.matchlen(s+4, coffsetS+4, src) + 4
  782. // See if we can find a long match at s+1
  783. const checkAt = 1
  784. cv := load6432(src, s+checkAt)
  785. nextHashL = hash8(cv, betterLongTableBits)
  786. candidateL = e.longTable[nextHashL]
  787. coffsetL = candidateL.offset - e.cur
  788. // We can store it, since we have at least a 4 byte match.
  789. e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
  790. e.markLongShardDirty(nextHashL)
  791. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  792. // Found a long match, at least 8 bytes.
  793. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  794. if matchedNext > matched {
  795. t = coffsetL
  796. s += checkAt
  797. matched = matchedNext
  798. if debugMatches {
  799. println("long match (after short)")
  800. }
  801. break
  802. }
  803. }
  804. // Check prev long...
  805. coffsetL = candidateL.prev - e.cur
  806. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  807. // Found a long match, at least 8 bytes.
  808. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  809. if matchedNext > matched {
  810. t = coffsetL
  811. s += checkAt
  812. matched = matchedNext
  813. if debugMatches {
  814. println("prev long match (after short)")
  815. }
  816. break
  817. }
  818. }
  819. t = coffsetS
  820. if debugAsserts && s <= t {
  821. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  822. }
  823. if debugAsserts && s-t > e.maxMatchOff {
  824. panic("s - t >e.maxMatchOff")
  825. }
  826. if debugAsserts && t < 0 {
  827. panic("t<0")
  828. }
  829. if debugMatches {
  830. println("short match")
  831. }
  832. break
  833. }
  834. // No match found, move forward in input.
  835. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  836. if s >= sLimit {
  837. break encodeLoop
  838. }
  839. cv = load6432(src, s)
  840. }
  841. // Try to find a better match by searching for a long match at the end of the current best match
  842. if s+matched < sLimit {
  843. nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
  844. cv := load3232(src, s)
  845. candidateL := e.longTable[nextHashL]
  846. coffsetL := candidateL.offset - e.cur - matched
  847. if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
  848. // Found a long match, at least 4 bytes.
  849. matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
  850. if matchedNext > matched {
  851. t = coffsetL
  852. matched = matchedNext
  853. if debugMatches {
  854. println("long match at end-of-match")
  855. }
  856. }
  857. }
  858. // Check prev long...
  859. if true {
  860. coffsetL = candidateL.prev - e.cur - matched
  861. if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
  862. // Found a long match, at least 4 bytes.
  863. matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
  864. if matchedNext > matched {
  865. t = coffsetL
  866. matched = matchedNext
  867. if debugMatches {
  868. println("prev long match at end-of-match")
  869. }
  870. }
  871. }
  872. }
  873. }
  874. // A match has been found. Update recent offsets.
  875. offset2 = offset1
  876. offset1 = s - t
  877. if debugAsserts && s <= t {
  878. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  879. }
  880. if debugAsserts && canRepeat && int(offset1) > len(src) {
  881. panic("invalid offset")
  882. }
  883. // Extend the n-byte match as long as possible.
  884. l := matched
  885. // Extend backwards
  886. tMin := s - e.maxMatchOff
  887. if tMin < 0 {
  888. tMin = 0
  889. }
  890. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  891. s--
  892. t--
  893. l++
  894. }
  895. // Write our sequence
  896. var seq seq
  897. seq.litLen = uint32(s - nextEmit)
  898. seq.matchLen = uint32(l - zstdMinMatch)
  899. if seq.litLen > 0 {
  900. blk.literals = append(blk.literals, src[nextEmit:s]...)
  901. }
  902. seq.offset = uint32(s-t) + 3
  903. s += l
  904. if debugSequences {
  905. println("sequence", seq, "next s:", s)
  906. }
  907. blk.sequences = append(blk.sequences, seq)
  908. nextEmit = s
  909. if s >= sLimit {
  910. break encodeLoop
  911. }
  912. // Index match start+1 (long) -> s - 1
  913. index0 := s - l + 1
  914. for index0 < s-1 {
  915. cv0 := load6432(src, index0)
  916. cv1 := cv0 >> 8
  917. h0 := hash8(cv0, betterLongTableBits)
  918. off := index0 + e.cur
  919. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  920. e.markLongShardDirty(h0)
  921. h1 := hash5(cv1, betterShortTableBits)
  922. e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
  923. e.markShortShardDirty(h1)
  924. index0 += 2
  925. }
  926. cv = load6432(src, s)
  927. if !canRepeat {
  928. continue
  929. }
  930. // Check offset 2
  931. for {
  932. o2 := s - offset2
  933. if load3232(src, o2) != uint32(cv) {
  934. // Do regular search
  935. break
  936. }
  937. // Store this, since we have it.
  938. nextHashS := hash5(cv, betterShortTableBits)
  939. nextHashL := hash8(cv, betterLongTableBits)
  940. // We have at least 4 byte match.
  941. // No need to check backwards. We come straight from a match
  942. l := 4 + e.matchlen(s+4, o2+4, src)
  943. e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
  944. e.markLongShardDirty(nextHashL)
  945. e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  946. e.markShortShardDirty(nextHashS)
  947. seq.matchLen = uint32(l) - zstdMinMatch
  948. seq.litLen = 0
  949. // Since litlen is always 0, this is offset 1.
  950. seq.offset = 1
  951. s += l
  952. nextEmit = s
  953. if debugSequences {
  954. println("sequence", seq, "next s:", s)
  955. }
  956. blk.sequences = append(blk.sequences, seq)
  957. // Swap offset 1 and 2.
  958. offset1, offset2 = offset2, offset1
  959. if s >= sLimit {
  960. // Finished
  961. break encodeLoop
  962. }
  963. cv = load6432(src, s)
  964. }
  965. }
  966. if int(nextEmit) < len(src) {
  967. blk.literals = append(blk.literals, src[nextEmit:]...)
  968. blk.extraLits = len(src) - int(nextEmit)
  969. }
  970. blk.recentOffsets[0] = uint32(offset1)
  971. blk.recentOffsets[1] = uint32(offset2)
  972. if debug {
  973. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  974. }
  975. }
  976. // ResetDict will reset and set a dictionary if not nil
  977. func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
  978. e.resetBase(d, singleBlock)
  979. if d != nil {
  980. panic("betterFastEncoder: Reset with dict")
  981. }
  982. }
  983. // ResetDict will reset and set a dictionary if not nil
  984. func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
  985. e.resetBase(d, singleBlock)
  986. if d == nil {
  987. return
  988. }
  989. // Init or copy dict table
  990. if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
  991. if len(e.dictTable) != len(e.table) {
  992. e.dictTable = make([]tableEntry, len(e.table))
  993. }
  994. end := int32(len(d.content)) - 8 + e.maxMatchOff
  995. for i := e.maxMatchOff; i < end; i += 4 {
  996. const hashLog = betterShortTableBits
  997. cv := load6432(d.content, i-e.maxMatchOff)
  998. nextHash := hash5(cv, hashLog) // 0 -> 4
  999. nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5
  1000. nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
  1001. nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
  1002. e.dictTable[nextHash] = tableEntry{
  1003. val: uint32(cv),
  1004. offset: i,
  1005. }
  1006. e.dictTable[nextHash1] = tableEntry{
  1007. val: uint32(cv >> 8),
  1008. offset: i + 1,
  1009. }
  1010. e.dictTable[nextHash2] = tableEntry{
  1011. val: uint32(cv >> 16),
  1012. offset: i + 2,
  1013. }
  1014. e.dictTable[nextHash3] = tableEntry{
  1015. val: uint32(cv >> 24),
  1016. offset: i + 3,
  1017. }
  1018. }
  1019. e.lastDictID = d.id
  1020. e.allDirty = true
  1021. }
  1022. // Init or copy dict table
  1023. if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
  1024. if len(e.dictLongTable) != len(e.longTable) {
  1025. e.dictLongTable = make([]prevEntry, len(e.longTable))
  1026. }
  1027. if len(d.content) >= 8 {
  1028. cv := load6432(d.content, 0)
  1029. h := hash8(cv, betterLongTableBits)
  1030. e.dictLongTable[h] = prevEntry{
  1031. offset: e.maxMatchOff,
  1032. prev: e.dictLongTable[h].offset,
  1033. }
  1034. end := int32(len(d.content)) - 8 + e.maxMatchOff
  1035. off := 8 // First to read
  1036. for i := e.maxMatchOff + 1; i < end; i++ {
  1037. cv = cv>>8 | (uint64(d.content[off]) << 56)
  1038. h := hash8(cv, betterLongTableBits)
  1039. e.dictLongTable[h] = prevEntry{
  1040. offset: i,
  1041. prev: e.dictLongTable[h].offset,
  1042. }
  1043. off++
  1044. }
  1045. }
  1046. e.lastDictID = d.id
  1047. e.allDirty = true
  1048. }
  1049. // Reset table to initial state
  1050. {
  1051. dirtyShardCnt := 0
  1052. if !e.allDirty {
  1053. for i := range e.shortTableShardDirty {
  1054. if e.shortTableShardDirty[i] {
  1055. dirtyShardCnt++
  1056. }
  1057. }
  1058. }
  1059. const shardCnt = betterShortTableShardCnt
  1060. const shardSize = betterShortTableShardSize
  1061. if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  1062. copy(e.table[:], e.dictTable)
  1063. for i := range e.shortTableShardDirty {
  1064. e.shortTableShardDirty[i] = false
  1065. }
  1066. } else {
  1067. for i := range e.shortTableShardDirty {
  1068. if !e.shortTableShardDirty[i] {
  1069. continue
  1070. }
  1071. copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
  1072. e.shortTableShardDirty[i] = false
  1073. }
  1074. }
  1075. }
  1076. {
  1077. dirtyShardCnt := 0
  1078. if !e.allDirty {
  1079. for i := range e.shortTableShardDirty {
  1080. if e.shortTableShardDirty[i] {
  1081. dirtyShardCnt++
  1082. }
  1083. }
  1084. }
  1085. const shardCnt = betterLongTableShardCnt
  1086. const shardSize = betterLongTableShardSize
  1087. if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  1088. copy(e.longTable[:], e.dictLongTable)
  1089. for i := range e.longTableShardDirty {
  1090. e.longTableShardDirty[i] = false
  1091. }
  1092. } else {
  1093. for i := range e.longTableShardDirty {
  1094. if !e.longTableShardDirty[i] {
  1095. continue
  1096. }
  1097. copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
  1098. e.longTableShardDirty[i] = false
  1099. }
  1100. }
  1101. }
  1102. e.cur = e.maxMatchOff
  1103. e.allDirty = false
  1104. }
  1105. func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
  1106. e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
  1107. }
  1108. func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
  1109. e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
  1110. }